row_num = 9 # 9, 50, 2, 7, 11, 13, 33
words = tokenizer.sequences_to_texts(Xwords_test[[row_num]])[0].split()
colors = colorize(attrs[0, row_num])
<ipython-input-99-6faf338aa0bf>:18: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = mpl.cm.get_cmap(cmap)
print('Predicted label = {}: {}'.format(y_nums_test.iloc[row_num], y_seq_test.iloc[row_num]))
Predicted label = 1: dementia
HTML("".join(list(map(hlstr, words, colors))))
from keras.models import Sequential
from keras.layers import Embedding, Dropout,Conv1D, Flatten, MaxPooling1D, Dense, LSTM,Conv1D,MaxPooling1D, Bidirectional
# from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
# from keras.constraints import maxnorm
from keras.preprocessing import sequence
from keras.callbacks import History
# from keras.preprocessing.text import Tokenizer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
# from tensorflow.keras.preprocessing.text import Tokenizer
# from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from nltk.corpus import stopwords
from gensim import utils
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix
from gensim.models import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
import nltk
nltk.download('punkt')
[nltk_data] Downloading package punkt to /root/nltk_data... [nltk_data] Package punkt is already up-to-date!
True
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
talk_bank_small = pd.read_csv("talk_bank_cookie.csv")
# talk_bank_small = talk_bank_small[talk_bank_small['text'].notna()]
# part_1 = talk_bank[talk_bank.label == 'dementia'].sample(frac = 0.35)
# part_2 = talk_bank[talk_bank.label == 'control'].sample(frac = 1.0)
# talk_bank_small = pd.concat([part_1, part_2], axis = 0)
# talk_bank_small.to_csv("C:\\Users\\cobus\\Desktop\\Thesis\\talk_bank_small.csv")
# talk_bank_small = pd.read_csv("talk_bank_small.csv")
# Text_INPUT_DIM=100
# text_model=None
# filename='docEmbeddings_5_clean.d2v'
# if os.path.isfile(filename):
# text_model = Doc2Vec.load(filename)
# else:
# text_model = Doc2Vec(min_count=1, window=5, size=Text_INPUT_DIM, sample=1e-4, negative=5, workers=4, iter=5,seed=1)
# text_model.build_vocab(sentences)
# text_model.train(sentences, total_examples=text_model.corpus_count, epochs=text_model.iter)
# text_model.save(filename
# tag_d = np.array(train_new.question_text)
# tagged_data = [TaggedDocument(words=word_tokenize(d.lower()),
# tags=[str(i)]) for i, d in enumerate(tag_d)]
# def tokenize_text(text):
# tokens = []
# for sent in nltk.sent_tokenize(text):
# for word in nltk.word_tokenize(sent):
# if len(word) < 2:
# continue
# tokens.append(word.lower())
# return tokens
# def w2v_tokenize_text(text):
# tokens = []
# for sent in nltk.sent_tokenize(text):
# for word in nltk.word_tokenize(sent):
# if len(word) < 2:
# continue
# tokens.append(word)
# return tokens
# train_data, test_data = train_test_split(talk_bank, test_size=0.1, random_state=42)
# train_tagged = train_data.apply(lambda r: TaggedDocument(words=tokenize_text(talk_bank['text']), tags=[r.tag]), axis=1)
# test_tagged = test_data.apply(lambda r: TaggedDocument(words=tokenize_text(talk_bank['text']), tags=[r.tag]), axis=1)
# test_tagged.values[50]
#Splitting data
X_seq_train, X_seq_test, y_seq_train, y_seq_test = train_test_split(talk_bank_small['text'], talk_bank_small['label'], test_size = 0.1, random_state = 100, stratify = talk_bank_small['label'])
# y_nums_train = [int(y =='dementia') for y in y_seq_train]
y_nums_train = (y_seq_train == 'dementia').astype(int)
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize
import numpy as np
from keras.preprocessing.text import Tokenizer
# Sample data: list of sentences
documents = X_seq_train
tokenizer = Tokenizer()
tokenizer.fit_on_texts(documents)
def embedding_for_vocab(source_embeddings, word_index,
embedding_dim):
vocab_size = len(word_index) + 1
# Adding again 1 because of reserved 0 index
embedding_matrix_vocab = np.zeros((vocab_size + 1000, embedding_dim))
words_found = 0
for word, i in word_index.items():
if word in source_embeddings:
words_found += 1
embedding_matrix_vocab[i] = source_embeddings[word]
print(words_found)
return embedding_matrix_vocab
# Tokenize the documents and tag them with an index
tagged_data = [
TaggedDocument(
words=[tokenizer.index_word[ind] for ind in tokenizer.texts_to_sequences([doc.lower()])[0]],
tags=[str(i)]
) for i, doc in enumerate(documents)
]
model = Doc2Vec(vector_size=500, # Size of the embedding vector
window=2, # Maximum distance between current and predicted word
min_count=1, # Ignores all words with total frequency lower than this
workers=4, # Number of worker threads to train the model
dm=0, # distributed memory mode
dbow_words=1, # train word embedding alongside document embeddings
epochs=40) # Number of training iterations over the corpus
model.build_vocab(tagged_data)
model.train(tagged_data, total_examples=model.corpus_count, epochs=model.epochs)
# Extracting embeddings (the embedding matrix for all documents)
embedding_matrix = embedding_for_vocab(model.wv, tokenizer.word_index, 500)
# for i in range(len(tagged_data)):
# embedding_matrix[i] = model.dv[str(i)] # Use model.dv (document vectors) to get embeddings
# # embedding_matrix now contains the embeddings for each document
# print("Embedding Matrix Shape:", embedding_matrix.shape)
# # To get the embedding of a new document
# new_document = "A completely new sentence to encode"
# new_doc_vector = model.infer_vector(word_tokenize(new_document.lower()))
# print("New Document Vector:", new_doc_vector)
1587
y_nums_test = (y_seq_test == 'dementia').astype(int)
# tokenizer = Tokenizer(num_words = 2500)
# tokenizer.fit_on_texts(X_seq_train)
Xwords_train = tokenizer.texts_to_sequences(X_seq_train)
Xwords_test = tokenizer.texts_to_sequences(X_seq_test)
#Padding the sentence sequences
max_review_length = 500
Xwords_train = pad_sequences(Xwords_train, maxlen=max_review_length)
Xwords_test = pad_sequences(Xwords_test, maxlen=max_review_length)
history = History()
model = Sequential()
model.add(Embedding(embedding_matrix.shape[0], output_dim = 500, weights = [embedding_matrix], input_length=500, trainable = False))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 500, 500) 1294000
lstm (LSTM) (None, 128) 322048
dense (Dense) (None, 1) 129
=================================================================
Total params: 1616177 (6.17 MB)
Trainable params: 322177 (1.23 MB)
Non-trainable params: 1294000 (4.94 MB)
_________________________________________________________________
None
# y_nums_train = tf.keras.utils.to_categorical(y_nums_train)
# y_seq_test = tf.keras.utils.to_categorical(y_seq_test)
import os
import tensorflow as tf
checkpoint_path = r"doc_lstm"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, monitor="val_loss", save_best_only=True,
save_weights_only=True,
verbose=1)
# Train the model with the new callback
model.fit(Xwords_train, y_nums_train, epochs=45, batch_size=256,verbose = 1,callbacks = [history, cp_callback],validation_split=0.1) # Pass callback to training
Epoch 1/45 2/2 [==============================] - ETA: 0s - loss: 0.6827 - accuracy: 0.5541 Epoch 1: val_loss improved from inf to 0.71803, saving model to doc_lstm 2/2 [==============================] - 20s 6s/step - loss: 0.6827 - accuracy: 0.5541 - val_loss: 0.7180 - val_accuracy: 0.5200 Epoch 2/45 2/2 [==============================] - ETA: 0s - loss: 0.6641 - accuracy: 0.5698 Epoch 2: val_loss improved from 0.71803 to 0.69013, saving model to doc_lstm 2/2 [==============================] - 14s 6s/step - loss: 0.6641 - accuracy: 0.5698 - val_loss: 0.6901 - val_accuracy: 0.5600 Epoch 3/45 2/2 [==============================] - ETA: 0s - loss: 0.6345 - accuracy: 0.6847 Epoch 3: val_loss improved from 0.69013 to 0.68505, saving model to doc_lstm 2/2 [==============================] - 15s 7s/step - loss: 0.6345 - accuracy: 0.6847 - val_loss: 0.6850 - val_accuracy: 0.5600 Epoch 4/45 2/2 [==============================] - ETA: 0s - loss: 0.6168 - accuracy: 0.6644 Epoch 4: val_loss did not improve from 0.68505 2/2 [==============================] - 14s 7s/step - loss: 0.6168 - accuracy: 0.6644 - val_loss: 0.6887 - val_accuracy: 0.5600 Epoch 5/45 2/2 [==============================] - ETA: 0s - loss: 0.5850 - accuracy: 0.6914 Epoch 5: val_loss improved from 0.68505 to 0.67596, saving model to doc_lstm 2/2 [==============================] - 15s 7s/step - loss: 0.5850 - accuracy: 0.6914 - val_loss: 0.6760 - val_accuracy: 0.5600 Epoch 6/45 2/2 [==============================] - ETA: 0s - loss: 0.5481 - accuracy: 0.7252 Epoch 6: val_loss did not improve from 0.67596 2/2 [==============================] - 14s 7s/step - loss: 0.5481 - accuracy: 0.7252 - val_loss: 0.7665 - val_accuracy: 0.6000 Epoch 7/45 2/2 [==============================] - ETA: 0s - loss: 0.5809 - accuracy: 0.7005 Epoch 7: val_loss improved from 0.67596 to 0.63715, saving model to doc_lstm 2/2 [==============================] - 15s 6s/step - loss: 0.5809 - accuracy: 0.7005 - val_loss: 0.6371 - val_accuracy: 0.6800 Epoch 8/45 2/2 [==============================] - ETA: 0s - loss: 0.5093 - accuracy: 0.7568 Epoch 8: val_loss did not improve from 0.63715 2/2 [==============================] - 21s 12s/step - loss: 0.5093 - accuracy: 0.7568 - val_loss: 0.6991 - val_accuracy: 0.6200 Epoch 9/45 2/2 [==============================] - ETA: 0s - loss: 0.5111 - accuracy: 0.7432 Epoch 9: val_loss improved from 0.63715 to 0.61580, saving model to doc_lstm 2/2 [==============================] - 14s 7s/step - loss: 0.5111 - accuracy: 0.7432 - val_loss: 0.6158 - val_accuracy: 0.6800 Epoch 10/45 2/2 [==============================] - ETA: 0s - loss: 0.4978 - accuracy: 0.7523 Epoch 10: val_loss improved from 0.61580 to 0.59728, saving model to doc_lstm 2/2 [==============================] - 14s 6s/step - loss: 0.4978 - accuracy: 0.7523 - val_loss: 0.5973 - val_accuracy: 0.7000 Epoch 11/45 2/2 [==============================] - ETA: 0s - loss: 0.4787 - accuracy: 0.7680 Epoch 11: val_loss improved from 0.59728 to 0.58623, saving model to doc_lstm 2/2 [==============================] - 14s 6s/step - loss: 0.4787 - accuracy: 0.7680 - val_loss: 0.5862 - val_accuracy: 0.7200 Epoch 12/45 2/2 [==============================] - ETA: 0s - loss: 0.4611 - accuracy: 0.7658 Epoch 12: val_loss improved from 0.58623 to 0.54931, saving model to doc_lstm 2/2 [==============================] - 15s 6s/step - loss: 0.4611 - accuracy: 0.7658 - val_loss: 0.5493 - val_accuracy: 0.7800 Epoch 13/45 2/2 [==============================] - ETA: 0s - loss: 0.4575 - accuracy: 0.7658 Epoch 13: val_loss improved from 0.54931 to 0.53644, saving model to doc_lstm 2/2 [==============================] - 15s 7s/step - loss: 0.4575 - accuracy: 0.7658 - val_loss: 0.5364 - val_accuracy: 0.7600 Epoch 14/45 2/2 [==============================] - ETA: 0s - loss: 0.4477 - accuracy: 0.7883 Epoch 14: val_loss did not improve from 0.53644 2/2 [==============================] - 15s 8s/step - loss: 0.4477 - accuracy: 0.7883 - val_loss: 0.5497 - val_accuracy: 0.7600 Epoch 15/45 2/2 [==============================] - ETA: 0s - loss: 0.4265 - accuracy: 0.7928 Epoch 15: val_loss did not improve from 0.53644 2/2 [==============================] - 14s 7s/step - loss: 0.4265 - accuracy: 0.7928 - val_loss: 0.5498 - val_accuracy: 0.8000 Epoch 16/45 2/2 [==============================] - ETA: 0s - loss: 0.4188 - accuracy: 0.8041 Epoch 16: val_loss did not improve from 0.53644 2/2 [==============================] - 14s 7s/step - loss: 0.4188 - accuracy: 0.8041 - val_loss: 0.5396 - val_accuracy: 0.8000 Epoch 17/45 2/2 [==============================] - ETA: 0s - loss: 0.4065 - accuracy: 0.8198 Epoch 17: val_loss did not improve from 0.53644 2/2 [==============================] - 14s 6s/step - loss: 0.4065 - accuracy: 0.8198 - val_loss: 0.6181 - val_accuracy: 0.7600 Epoch 18/45 2/2 [==============================] - ETA: 0s - loss: 0.4545 - accuracy: 0.7770 Epoch 18: val_loss did not improve from 0.53644 2/2 [==============================] - 14s 6s/step - loss: 0.4545 - accuracy: 0.7770 - val_loss: 0.5503 - val_accuracy: 0.7400 Epoch 19/45 2/2 [==============================] - ETA: 0s - loss: 0.4261 - accuracy: 0.7995 Epoch 19: val_loss did not improve from 0.53644 2/2 [==============================] - 15s 7s/step - loss: 0.4261 - accuracy: 0.7995 - val_loss: 0.5515 - val_accuracy: 0.7600 Epoch 20/45 2/2 [==============================] - ETA: 0s - loss: 0.3976 - accuracy: 0.7995 Epoch 20: val_loss did not improve from 0.53644 2/2 [==============================] - 14s 6s/step - loss: 0.3976 - accuracy: 0.7995 - val_loss: 0.5428 - val_accuracy: 0.8000 Epoch 21/45 2/2 [==============================] - ETA: 0s - loss: 0.3960 - accuracy: 0.8198 Epoch 21: val_loss did not improve from 0.53644 2/2 [==============================] - 14s 7s/step - loss: 0.3960 - accuracy: 0.8198 - val_loss: 0.5406 - val_accuracy: 0.7600 Epoch 22/45 2/2 [==============================] - ETA: 0s - loss: 0.3928 - accuracy: 0.8153 Epoch 22: val_loss improved from 0.53644 to 0.53429, saving model to doc_lstm 2/2 [==============================] - 14s 7s/step - loss: 0.3928 - accuracy: 0.8153 - val_loss: 0.5343 - val_accuracy: 0.7800 Epoch 23/45 2/2 [==============================] - ETA: 0s - loss: 0.3718 - accuracy: 0.8401 Epoch 23: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.3718 - accuracy: 0.8401 - val_loss: 0.5447 - val_accuracy: 0.7800 Epoch 24/45 2/2 [==============================] - ETA: 0s - loss: 0.3872 - accuracy: 0.8266 Epoch 24: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.3872 - accuracy: 0.8266 - val_loss: 0.5434 - val_accuracy: 0.7800 Epoch 25/45 2/2 [==============================] - ETA: 0s - loss: 0.3789 - accuracy: 0.8288 Epoch 25: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.3789 - accuracy: 0.8288 - val_loss: 0.5448 - val_accuracy: 0.7600 Epoch 26/45 2/2 [==============================] - ETA: 0s - loss: 0.3577 - accuracy: 0.8446 Epoch 26: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.3577 - accuracy: 0.8446 - val_loss: 0.5916 - val_accuracy: 0.7400 Epoch 27/45 2/2 [==============================] - ETA: 0s - loss: 0.3648 - accuracy: 0.8198 Epoch 27: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.3648 - accuracy: 0.8198 - val_loss: 0.5811 - val_accuracy: 0.7600 Epoch 28/45 2/2 [==============================] - ETA: 0s - loss: 0.3664 - accuracy: 0.8423 Epoch 28: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.3664 - accuracy: 0.8423 - val_loss: 0.6044 - val_accuracy: 0.7400 Epoch 29/45 2/2 [==============================] - ETA: 0s - loss: 0.3467 - accuracy: 0.8356 Epoch 29: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.3467 - accuracy: 0.8356 - val_loss: 0.5772 - val_accuracy: 0.7400 Epoch 30/45 2/2 [==============================] - ETA: 0s - loss: 0.3545 - accuracy: 0.8446 Epoch 30: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.3545 - accuracy: 0.8446 - val_loss: 0.5831 - val_accuracy: 0.7400 Epoch 31/45 2/2 [==============================] - ETA: 0s - loss: 0.3426 - accuracy: 0.8491 Epoch 31: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.3426 - accuracy: 0.8491 - val_loss: 0.6149 - val_accuracy: 0.7200 Epoch 32/45 2/2 [==============================] - ETA: 0s - loss: 0.3099 - accuracy: 0.8626 Epoch 32: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.3099 - accuracy: 0.8626 - val_loss: 0.5976 - val_accuracy: 0.7200 Epoch 33/45 2/2 [==============================] - ETA: 0s - loss: 0.2937 - accuracy: 0.8739 Epoch 33: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.2937 - accuracy: 0.8739 - val_loss: 0.6099 - val_accuracy: 0.7800 Epoch 34/45 2/2 [==============================] - ETA: 0s - loss: 0.2782 - accuracy: 0.8604 Epoch 34: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.2782 - accuracy: 0.8604 - val_loss: 0.6462 - val_accuracy: 0.7400 Epoch 35/45 2/2 [==============================] - ETA: 0s - loss: 0.2767 - accuracy: 0.8829 Epoch 35: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.2767 - accuracy: 0.8829 - val_loss: 0.6612 - val_accuracy: 0.7400 Epoch 36/45 2/2 [==============================] - ETA: 0s - loss: 0.2528 - accuracy: 0.9054 Epoch 36: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.2528 - accuracy: 0.9054 - val_loss: 0.6671 - val_accuracy: 0.7600 Epoch 37/45 2/2 [==============================] - ETA: 0s - loss: 0.2387 - accuracy: 0.9167 Epoch 37: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.2387 - accuracy: 0.9167 - val_loss: 0.7167 - val_accuracy: 0.7200 Epoch 38/45 2/2 [==============================] - ETA: 0s - loss: 0.2165 - accuracy: 0.9234 Epoch 38: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.2165 - accuracy: 0.9234 - val_loss: 0.7511 - val_accuracy: 0.7600 Epoch 39/45 2/2 [==============================] - ETA: 0s - loss: 0.1982 - accuracy: 0.9257 Epoch 39: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.1982 - accuracy: 0.9257 - val_loss: 0.7967 - val_accuracy: 0.8000 Epoch 40/45 2/2 [==============================] - ETA: 0s - loss: 0.2621 - accuracy: 0.8874 Epoch 40: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.2621 - accuracy: 0.8874 - val_loss: 0.7876 - val_accuracy: 0.7200 Epoch 41/45 2/2 [==============================] - ETA: 0s - loss: 0.2705 - accuracy: 0.8761 Epoch 41: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.2705 - accuracy: 0.8761 - val_loss: 0.7688 - val_accuracy: 0.6600 Epoch 42/45 2/2 [==============================] - ETA: 0s - loss: 0.2733 - accuracy: 0.8941 Epoch 42: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.2733 - accuracy: 0.8941 - val_loss: 0.8383 - val_accuracy: 0.6600 Epoch 43/45 2/2 [==============================] - ETA: 0s - loss: 0.2428 - accuracy: 0.8851 Epoch 43: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 6s/step - loss: 0.2428 - accuracy: 0.8851 - val_loss: 0.8889 - val_accuracy: 0.7200 Epoch 44/45 2/2 [==============================] - ETA: 0s - loss: 0.2604 - accuracy: 0.8829 Epoch 44: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.2604 - accuracy: 0.8829 - val_loss: 0.8022 - val_accuracy: 0.6800 Epoch 45/45 2/2 [==============================] - ETA: 0s - loss: 0.2083 - accuracy: 0.9212 Epoch 45: val_loss did not improve from 0.53429 2/2 [==============================] - 14s 7s/step - loss: 0.2083 - accuracy: 0.9212 - val_loss: 0.8531 - val_accuracy: 0.7200
<keras.src.callbacks.History at 0x7a75ede07d60>
# Loads the weights
model.load_weights(checkpoint_path)
# Re-evaluate the model
loss, acc = model.evaluate(Xwords_test, y_nums_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
2/2 - 1s - loss: 0.3998 - accuracy: 0.7818 - 1s/epoch - 683ms/step Restored model, accuracy: 78.18%
# type(model)
from sklearn.metrics import roc_curve , roc_auc_score
y_pred_probas = model.predict(Xwords_test)
y_pred_probas = [i[0] for i in y_pred_probas]
fpr, tpr, thresholds = roc_curve(y_seq_test, y_pred_probas, pos_label = 'dementia')
# roc_auc = roc_auc_score(y_test, scores)
plt.plot(fpr, tpr)
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()
2/2 [==============================] - 1s 217ms/step
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x = model.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x = (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
final = final/100
print(final)
2/2 [==============================] - 0s 153ms/step 0.58
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
[[21 3]
[ 6 25]]
precision recall f1-score support
0 0.78 0.88 0.82 24
1 0.89 0.81 0.85 31
accuracy 0.84 55
macro avg 0.84 0.84 0.84 55
weighted avg 0.84 0.84 0.84 55
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
pip install lime
Collecting lime
Downloading lime-0.2.0.1.tar.gz (275 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 275.7/275.7 kB 2.5 MB/s eta 0:00:00
Preparing metadata (setup.py) ... done
Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from lime) (3.7.1)
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from lime) (1.25.2)
Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from lime) (1.11.4)
Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from lime) (4.66.2)
Requirement already satisfied: scikit-learn>=0.18 in /usr/local/lib/python3.10/dist-packages (from lime) (1.2.2)
Requirement already satisfied: scikit-image>=0.12 in /usr/local/lib/python3.10/dist-packages (from lime) (0.19.3)
Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (3.3)
Requirement already satisfied: pillow!=7.1.0,!=7.1.1,!=8.3.0,>=6.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (9.4.0)
Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (2.31.6)
Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (2024.2.12)
Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (1.6.0)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image>=0.12->lime) (24.0)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.18->lime) (1.4.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.18->lime) (3.4.0)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (1.2.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (4.51.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (1.4.5)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->lime) (2.8.2)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->lime) (1.16.0)
Building wheels for collected packages: lime
Building wheel for lime (setup.py) ... done
Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283835 sha256=fa05d9702f56fa5fce248150041553c402bf6b8318ed72c66e8d1d944535707f
Stored in directory: /root/.cache/pip/wheels/fd/a2/af/9ac0a1a85a27f314a06b39e1f492bee1547d52549a4606ed89
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1
from lime.lime_text import LimeTextExplainer
class_names=['control','dementia']
explainer= LimeTextExplainer(class_names=class_names)
def predict_proba(arr):
list_tokenized_ex = tokenizer.texts_to_sequences(arr)
seqs = pad_sequences(list_tokenized_ex, maxlen=max_review_length)
preds = model.predict(seqs)
returnable = []
for pred in preds:
temp = pred[0]
returnable.append(np.array([1-temp, temp])) #I would recommend rounding temp and 1-temp off to 2 places
return np.array(returnable)
# print("Actual text:", X_seq_test.iloc[50])
print("Actual class:", y_seq_test.iloc[50])
explainer.explain_instance(X_seq_test.iloc[50], predict_proba).show_in_notebook(text=True)
Actual class: control 157/157 [==============================] - 45s 289ms/step
# print("Actual text:", X_seq_test.iloc[2])
print("Actual class:", y_seq_test.iloc[2])
explainer.explain_instance(X_seq_test.iloc[2], predict_proba).show_in_notebook(text=True)
Actual class: control 157/157 [==============================] - 23s 146ms/step
# print("Actual text:", X_seq_test.iloc[7])
print("Actual class:", y_seq_test.iloc[7])
explainer.explain_instance(X_seq_test.iloc[7], predict_proba).show_in_notebook(text=True)
Actual class: dementia 157/157 [==============================] - 26s 166ms/step
print("Actual text:", X_seq_test.iloc[11])
print("Actual class:", y_seq_test.iloc[11])
explainer.explain_instance(X_seq_test.iloc[11], predict_proba).show_in_notebook(text=True)
Actual text: now honey i had it was in the kitchen and i was the . and if we made a mess like that you'd get a kick in the ass . well we have uh spilling of the water . and a kid with his cookie jar . and a stool is turned over . and a mother's running the water on the floor . and what else do you want from that . it looks like somebody's laying out in the grass doesn't it . and a kid in the cookie jar . and a tilted stool . what more do you want . the the water rolling on the floor . Actual class: dementia 157/157 [==============================] - 25s 161ms/step
print("Actual text:", X_seq_test.iloc[7])
print("Actual class:", y_seq_test.iloc[7])
explainer.explain_instance(X_seq_test.iloc[7], predict_proba).as_map()
Actual text: oh you want me to tell you . the mother and her two children . and the children are getting in the cookie jar . and she's doing the dishes and spilling the water . and she had the spigot on . and she didn't know it perhaps . pardon me . and they're looking out into the garden from the kitchen window . it's open . and the uh cookies must be pretty good they're eating . the tair uh the chair . and uh the lady the mother's splashing her shoes and . and there's um uh a window and curtains on the window . and i can see some trees outside there . and and there's dishes that had been washed . and she's drying them . and there's some shrub out there and . Actual class: dementia
{1: [(40, -0.12552805915391527),
(61, 0.09417966573846853),
(8, 0.09253703905058751),
(54, 0.049728746350582234),
(41, -0.046064330154323194),
(37, -0.037140565690954704),
(57, -0.03637788549330808),
(62, -0.030619669923949544),
(9, -0.02963651122140742),
(42, 0.023940963328369008)]}
# import shap
# # we use the first 100 training examples as our background dataset to integrate over
# explainer = shap.DeepExplainer(model, Xwords_train[:100])
# # explain the first 10 predictions
# # explaining each prediction requires 2 * background dataset size runs
# shap_values = explainer.shap_values(Xwords_test[:10])
history = History()
model_bilstm = Sequential()
model_bilstm.add(Embedding(embedding_matrix.shape[0], output_dim = 100, weights = [embedding_matrix], input_length=100, trainable = False))
model_bilstm.add(Bidirectional(LSTM(128, recurrent_dropout=0.2)))
model_bilstm.add(Dense(1, activation='sigmoid'))
model_bilstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model_bilstm.summary())
WARNING:tensorflow:From C:\Users\cobus\anaconda3\lib\site-packages\tensorflow\python\ops\init_ops.py:93: calling GlorotUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
WARNING:tensorflow:From C:\Users\cobus\anaconda3\lib\site-packages\tensorflow\python\ops\init_ops.py:93: calling Orthogonal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
WARNING:tensorflow:From C:\Users\cobus\anaconda3\lib\site-packages\tensorflow\python\ops\init_ops.py:93: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_1 (Embedding) (None, 100, 100) 258800
bidirectional (Bidirectiona (None, 256) 234496
l)
dense_1 (Dense) (None, 1) 257
=================================================================
Total params: 493,553
Trainable params: 234,753
Non-trainable params: 258,800
_________________________________________________________________
None
import os
import tensorflow as tf
checkpoint_path = r"doc_bilstm"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, monitor="val_loss", save_best_only=True,
save_weights_only=True,
verbose=1)
# Train the model with the new callback
model_bilstm.fit(Xwords_train, y_nums_train, epochs=45, batch_size=256,verbose = 1,callbacks = [history, cp_callback],validation_split=0.1) # Pass callback to training
Train on 444 samples, validate on 50 samples Epoch 1/45 444/444 [==============================] - ETA: 0s - loss: 0.6840 - acc: 0.5676
C:\Users\cobus\AppData\Roaming\Python\Python39\site-packages\keras\engine\training_v1.py:2335: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. updates = self.state_updates
Epoch 1: val_loss improved from inf to 0.69308, saving model to doc_bilstm 444/444 [==============================] - 4s 10ms/sample - loss: 0.6840 - acc: 0.5676 - val_loss: 0.6931 - val_acc: 0.5200 Epoch 2/45 444/444 [==============================] - ETA: 0s - loss: 0.6640 - acc: 0.5766 Epoch 2: val_loss improved from 0.69308 to 0.68660, saving model to doc_bilstm 444/444 [==============================] - 3s 6ms/sample - loss: 0.6640 - acc: 0.5766 - val_loss: 0.6866 - val_acc: 0.5600 Epoch 3/45 444/444 [==============================] - ETA: 0s - loss: 0.6446 - acc: 0.6306 Epoch 3: val_loss improved from 0.68660 to 0.68046, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.6446 - acc: 0.6306 - val_loss: 0.6805 - val_acc: 0.5800 Epoch 4/45 444/444 [==============================] - ETA: 0s - loss: 0.6245 - acc: 0.6712 Epoch 4: val_loss improved from 0.68046 to 0.67537, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.6245 - acc: 0.6712 - val_loss: 0.6754 - val_acc: 0.5800 Epoch 5/45 444/444 [==============================] - ETA: 0s - loss: 0.6032 - acc: 0.6937 Epoch 5: val_loss improved from 0.67537 to 0.67375, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.6032 - acc: 0.6937 - val_loss: 0.6737 - val_acc: 0.5800 Epoch 6/45 444/444 [==============================] - ETA: 0s - loss: 0.5735 - acc: 0.7140 Epoch 6: val_loss improved from 0.67375 to 0.64137, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.5735 - acc: 0.7140 - val_loss: 0.6414 - val_acc: 0.5600 Epoch 7/45 444/444 [==============================] - ETA: 0s - loss: 0.5537 - acc: 0.7365 Epoch 7: val_loss improved from 0.64137 to 0.63568, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.5537 - acc: 0.7365 - val_loss: 0.6357 - val_acc: 0.6000 Epoch 8/45 444/444 [==============================] - ETA: 0s - loss: 0.5297 - acc: 0.7500 Epoch 8: val_loss did not improve from 0.63568 444/444 [==============================] - 3s 7ms/sample - loss: 0.5297 - acc: 0.7500 - val_loss: 0.6894 - val_acc: 0.6200 Epoch 9/45 444/444 [==============================] - ETA: 0s - loss: 0.5372 - acc: 0.7275 Epoch 9: val_loss improved from 0.63568 to 0.61377, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.5372 - acc: 0.7275 - val_loss: 0.6138 - val_acc: 0.6600 Epoch 10/45 444/444 [==============================] - ETA: 0s - loss: 0.5175 - acc: 0.7477 Epoch 10: val_loss did not improve from 0.61377 444/444 [==============================] - 3s 7ms/sample - loss: 0.5175 - acc: 0.7477 - val_loss: 0.6162 - val_acc: 0.6400 Epoch 11/45 444/444 [==============================] - ETA: 0s - loss: 0.4947 - acc: 0.7613 Epoch 11: val_loss did not improve from 0.61377 444/444 [==============================] - 3s 7ms/sample - loss: 0.4947 - acc: 0.7613 - val_loss: 0.6526 - val_acc: 0.6600 Epoch 12/45 444/444 [==============================] - ETA: 0s - loss: 0.4870 - acc: 0.7613 Epoch 12: val_loss improved from 0.61377 to 0.61073, saving model to doc_bilstm 444/444 [==============================] - 3s 8ms/sample - loss: 0.4870 - acc: 0.7613 - val_loss: 0.6107 - val_acc: 0.6800 Epoch 13/45 444/444 [==============================] - ETA: 0s - loss: 0.4815 - acc: 0.7703 Epoch 13: val_loss improved from 0.61073 to 0.59304, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.4815 - acc: 0.7703 - val_loss: 0.5930 - val_acc: 0.7000 Epoch 14/45 444/444 [==============================] - ETA: 0s - loss: 0.4641 - acc: 0.7793 Epoch 14: val_loss did not improve from 0.59304 444/444 [==============================] - 3s 7ms/sample - loss: 0.4641 - acc: 0.7793 - val_loss: 0.6088 - val_acc: 0.7000 Epoch 15/45 444/444 [==============================] - ETA: 0s - loss: 0.4597 - acc: 0.7725 Epoch 15: val_loss improved from 0.59304 to 0.57292, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.4597 - acc: 0.7725 - val_loss: 0.5729 - val_acc: 0.7400 Epoch 16/45 444/444 [==============================] - ETA: 0s - loss: 0.4441 - acc: 0.8086 Epoch 16: val_loss improved from 0.57292 to 0.56109, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.4441 - acc: 0.8086 - val_loss: 0.5611 - val_acc: 0.7200 Epoch 17/45 444/444 [==============================] - ETA: 0s - loss: 0.4467 - acc: 0.8018 Epoch 17: val_loss did not improve from 0.56109 444/444 [==============================] - 3s 7ms/sample - loss: 0.4467 - acc: 0.8018 - val_loss: 0.5820 - val_acc: 0.7000 Epoch 18/45 444/444 [==============================] - ETA: 0s - loss: 0.4416 - acc: 0.7838 Epoch 18: val_loss improved from 0.56109 to 0.55300, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.4416 - acc: 0.7838 - val_loss: 0.5530 - val_acc: 0.7400 Epoch 19/45 444/444 [==============================] - ETA: 0s - loss: 0.4209 - acc: 0.7995 Epoch 19: val_loss did not improve from 0.55300 444/444 [==============================] - 3s 7ms/sample - loss: 0.4209 - acc: 0.7995 - val_loss: 0.5534 - val_acc: 0.7200 Epoch 20/45 444/444 [==============================] - ETA: 0s - loss: 0.4137 - acc: 0.8041 Epoch 20: val_loss did not improve from 0.55300 444/444 [==============================] - 3s 7ms/sample - loss: 0.4137 - acc: 0.8041 - val_loss: 0.5593 - val_acc: 0.7400 Epoch 21/45 444/444 [==============================] - ETA: 0s - loss: 0.4009 - acc: 0.8153 Epoch 21: val_loss improved from 0.55300 to 0.53683, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.4009 - acc: 0.8153 - val_loss: 0.5368 - val_acc: 0.7400 Epoch 22/45 444/444 [==============================] - ETA: 0s - loss: 0.3970 - acc: 0.8176 Epoch 22: val_loss improved from 0.53683 to 0.52265, saving model to doc_bilstm 444/444 [==============================] - 3s 8ms/sample - loss: 0.3970 - acc: 0.8176 - val_loss: 0.5226 - val_acc: 0.7400 Epoch 23/45 444/444 [==============================] - ETA: 0s - loss: 0.3700 - acc: 0.8311 Epoch 23: val_loss improved from 0.52265 to 0.51507, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.3700 - acc: 0.8311 - val_loss: 0.5151 - val_acc: 0.7800 Epoch 24/45 444/444 [==============================] - ETA: 0s - loss: 0.3914 - acc: 0.7995 Epoch 24: val_loss improved from 0.51507 to 0.50999, saving model to doc_bilstm 444/444 [==============================] - 3s 7ms/sample - loss: 0.3914 - acc: 0.7995 - val_loss: 0.5100 - val_acc: 0.7400 Epoch 25/45 444/444 [==============================] - ETA: 0s - loss: 0.3735 - acc: 0.8514 Epoch 25: val_loss did not improve from 0.50999 444/444 [==============================] - 3s 7ms/sample - loss: 0.3735 - acc: 0.8514 - val_loss: 0.5360 - val_acc: 0.7600 Epoch 26/45 444/444 [==============================] - ETA: 0s - loss: 0.3645 - acc: 0.8311 Epoch 26: val_loss did not improve from 0.50999 444/444 [==============================] - 3s 7ms/sample - loss: 0.3645 - acc: 0.8311 - val_loss: 0.5371 - val_acc: 0.7600 Epoch 27/45 444/444 [==============================] - ETA: 0s - loss: 0.3548 - acc: 0.8356 Epoch 27: val_loss improved from 0.50999 to 0.50094, saving model to doc_bilstm 444/444 [==============================] - 4s 8ms/sample - loss: 0.3548 - acc: 0.8356 - val_loss: 0.5009 - val_acc: 0.7600 Epoch 28/45 444/444 [==============================] - ETA: 0s - loss: 0.3548 - acc: 0.8311 Epoch 28: val_loss improved from 0.50094 to 0.50021, saving model to doc_bilstm 444/444 [==============================] - 3s 8ms/sample - loss: 0.3548 - acc: 0.8311 - val_loss: 0.5002 - val_acc: 0.7800 Epoch 29/45 444/444 [==============================] - ETA: 0s - loss: 0.3523 - acc: 0.8468 Epoch 29: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.3523 - acc: 0.8468 - val_loss: 0.5147 - val_acc: 0.7400 Epoch 30/45 444/444 [==============================] - ETA: 0s - loss: 0.3211 - acc: 0.8514 Epoch 30: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 8ms/sample - loss: 0.3211 - acc: 0.8514 - val_loss: 0.5417 - val_acc: 0.7600 Epoch 31/45 444/444 [==============================] - ETA: 0s - loss: 0.3338 - acc: 0.8378 Epoch 31: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.3338 - acc: 0.8378 - val_loss: 0.5126 - val_acc: 0.7600 Epoch 32/45 444/444 [==============================] - ETA: 0s - loss: 0.3114 - acc: 0.8806 Epoch 32: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.3114 - acc: 0.8806 - val_loss: 0.5241 - val_acc: 0.7400 Epoch 33/45 444/444 [==============================] - ETA: 0s - loss: 0.3206 - acc: 0.8761 Epoch 33: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.3206 - acc: 0.8761 - val_loss: 0.5693 - val_acc: 0.7800 Epoch 34/45 444/444 [==============================] - ETA: 0s - loss: 0.3146 - acc: 0.8604 Epoch 34: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.3146 - acc: 0.8604 - val_loss: 0.5775 - val_acc: 0.6800 Epoch 35/45 444/444 [==============================] - ETA: 0s - loss: 0.3312 - acc: 0.8649 Epoch 35: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.3312 - acc: 0.8649 - val_loss: 0.6024 - val_acc: 0.6000 Epoch 36/45 444/444 [==============================] - ETA: 0s - loss: 0.3147 - acc: 0.8604 Epoch 36: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.3147 - acc: 0.8604 - val_loss: 0.6386 - val_acc: 0.7200 Epoch 37/45 444/444 [==============================] - ETA: 0s - loss: 0.2799 - acc: 0.8851 Epoch 37: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.2799 - acc: 0.8851 - val_loss: 0.5681 - val_acc: 0.8000 Epoch 38/45 444/444 [==============================] - ETA: 0s - loss: 0.2766 - acc: 0.9009 Epoch 38: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.2766 - acc: 0.9009 - val_loss: 0.5914 - val_acc: 0.7800 Epoch 39/45 444/444 [==============================] - ETA: 0s - loss: 0.2787 - acc: 0.8919 Epoch 39: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 7ms/sample - loss: 0.2787 - acc: 0.8919 - val_loss: 0.5533 - val_acc: 0.7600 Epoch 40/45 444/444 [==============================] - ETA: 0s - loss: 0.2474 - acc: 0.9077 Epoch 40: val_loss did not improve from 0.50021 444/444 [==============================] - 4s 8ms/sample - loss: 0.2474 - acc: 0.9077 - val_loss: 0.5678 - val_acc: 0.7200 Epoch 41/45 444/444 [==============================] - ETA: 0s - loss: 0.2417 - acc: 0.9122 Epoch 41: val_loss did not improve from 0.50021 444/444 [==============================] - 3s 8ms/sample - loss: 0.2417 - acc: 0.9122 - val_loss: 0.6432 - val_acc: 0.7400 Epoch 42/45 444/444 [==============================] - ETA: 0s - loss: 0.2349 - acc: 0.9032 Epoch 42: val_loss did not improve from 0.50021 444/444 [==============================] - 4s 10ms/sample - loss: 0.2349 - acc: 0.9032 - val_loss: 0.5926 - val_acc: 0.7200 Epoch 43/45 444/444 [==============================] - ETA: 0s - loss: 0.2458 - acc: 0.9077 Epoch 43: val_loss did not improve from 0.50021 444/444 [==============================] - 4s 10ms/sample - loss: 0.2458 - acc: 0.9077 - val_loss: 0.6053 - val_acc: 0.7600 Epoch 44/45 444/444 [==============================] - ETA: 0s - loss: 0.2301 - acc: 0.9054 Epoch 44: val_loss did not improve from 0.50021 444/444 [==============================] - 5s 10ms/sample - loss: 0.2301 - acc: 0.9054 - val_loss: 0.6479 - val_acc: 0.7600 Epoch 45/45 444/444 [==============================] - ETA: 0s - loss: 0.2300 - acc: 0.9167 Epoch 45: val_loss did not improve from 0.50021 444/444 [==============================] - 4s 9ms/sample - loss: 0.2300 - acc: 0.9167 - val_loss: 0.6391 - val_acc: 0.6600
<keras.callbacks.History at 0x16de24a0bb0>
# Loads the weights
model_bilstm.load_weights(checkpoint_path)
# Re-evaluate the model
loss, acc = model_bilstm.evaluate(Xwords_test, y_nums_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
2/2 - 0s - loss: 0.3335 - accuracy: 0.8909 - 128ms/epoch - 64ms/step Restored model, accuracy: 89.09%
from sklearn.metrics import roc_curve , roc_auc_score
y_pred_probas = model_bilstm.predict(Xwords_test)
y_pred_probas = [i[0] for i in y_pred_probas]
fpr, tpr, thresholds = roc_curve(y_seq_test, y_pred_probas, pos_label = 'dementia')
#roc_auc = roc_auc_score(y_test, scores)
plt.plot(fpr, tpr)
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()
C:\Users\cobus\AppData\Roaming\Python\Python39\site-packages\keras\engine\training_v1.py:2359: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. updates=self.state_updates,
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model_bilstm.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
print(final)
36
final = final/100
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
[[17 7]
[ 5 26]]
precision recall f1-score support
0 0.77 0.71 0.74 24
1 0.79 0.84 0.81 31
accuracy 0.78 55
macro avg 0.78 0.77 0.78 55
weighted avg 0.78 0.78 0.78 55
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
from lime.lime_text import LimeTextExplainer
class_names=['control','dementia']
explainer= LimeTextExplainer(class_names=class_names)
def predict_proba(arr):
list_tokenized_ex = tokenizer.texts_to_sequences(arr)
seqs = pad_sequences(list_tokenized_ex, maxlen=max_review_length)
preds = model_bilstm.predict(seqs)
returnable = []
for pred in preds:
temp = pred[0]
returnable.append(np.array([1-temp, temp]))
return np.array(returnable)
print("Actual class:", y_seq_test.iloc[50])
explainer.explain_instance(X_seq_test.iloc[50], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[2])
explainer.explain_instance(X_seq_test.iloc[2], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[7])
explainer.explain_instance(X_seq_test.iloc[7], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[11])
explainer.explain_instance(X_seq_test.iloc[11], predict_proba).show_in_notebook(text=True)
Actual class: dementia
history = History()
def create_conv_model():
model_conv = Sequential()
model_conv.add(Embedding(input_dim = embedding_matrix.shape[0], output_dim = 100, weights = [embedding_matrix], input_length=100, trainable = False))
model_conv.add(Dropout(0.1))
model_conv.add(Conv1D(32, 5, activation='relu'))
model_conv.add(MaxPooling1D(pool_size=4))
model_conv.add(LSTM(64))
model_conv.add(Dense(1, activation='sigmoid'))
model_conv.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])
return model_conv
import os
import tensorflow as tf
checkpoint_path = r"doc_cnnlstm"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, monitor="val_loss", save_best_only=True,
save_weights_only=True,
verbose=1)
# Train the model with the new callback
model_conv1 = create_conv_model()
# model_conv1.fit(Xwords_train, y_nums_train, epochs=45, batch_size=256,verbose = 1,callbacks = [history, cp_callback],validation_split=0.1) # Pass callback to training
# Loads the weights
model_conv1.load_weights(checkpoint_path)
# Re-evaluate the model
loss, acc = model_conv1.evaluate(Xwords_test, y_nums_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
2/2 - 0s - loss: 0.3351 - accuracy: 0.8545 - 23ms/epoch - 11ms/step Restored model, accuracy: 85.45%
from sklearn.metrics import roc_curve , roc_auc_score
y_pred_probas = model_conv1.predict(Xwords_test)
y_pred_probas = [i[0] for i in y_pred_probas]
fpr, tpr, thresholds = roc_curve(y_seq_test, y_pred_probas, pos_label = 'dementia')
#roc_auc = roc_auc_score(y_test, scores)
plt.plot(fpr, tpr)
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()
C:\Users\cobus\AppData\Roaming\Python\Python39\site-packages\keras\engine\training_v1.py:2359: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. updates=self.state_updates,
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model_conv1.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
print(final)
80
final = final/100
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
[[20 4]
[ 3 28]]
precision recall f1-score support
0 0.87 0.83 0.85 24
1 0.88 0.90 0.89 31
accuracy 0.87 55
macro avg 0.87 0.87 0.87 55
weighted avg 0.87 0.87 0.87 55
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
import numpy as np
import pandas as pd
import os
from tqdm import tqdm, trange
from sklearn.metrics import roc_auc_score
import pickle
import gc
from transformers import BertModel, BertTokenizer
BERT_MODEL = 'bert-base-uncased'
INPUT = r"C:\Users\cobus\Desktop\Thesis\talk_bank_small.csv"
TEXT_COL = talk_bank_small['text']
MAXLEN = 250
#Splitting data
X_seq_train, X_seq_test, y_seq_train, y_seq_test = train_test_split(talk_bank_small['text'], talk_bank_small['label'], test_size = 0.1, random_state = 100, stratify = talk_bank_small['label'])
sentences = X_seq_train
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL)
def tokenize(sentences):
input_ids = []
attention_masks = []
# For every sentence...
for sent in sentences:
encoded_dict = tokenizer.encode_plus(
sent, # Sentence to split into tokens
add_special_tokens = True, # Add special token '[CLS]' and '[SEP]'
# max_length = 100, # Pad & truncate all sentences.
padding = 'longest',
return_attention_mask = True, # Construct attention masks.
return_tensors = 'pt', # Return pytorch tensors.
)
# adding the encoded sentence to the list.
tensor = encoded_dict['input_ids']
input_ids.append(encoded_dict['input_ids'].reshape((-1,)).detach().numpy())
# attention mask (to differentiate padding from non-padding).
attention_masks.append(encoded_dict['attention_mask'])
return input_ids
input_ids = tokenize(X_seq_train)
test_ids = tokenize(X_seq_test)
Token indices sequence length is longer than the specified maximum sequence length for this model (530 > 512). Running this sequence through the model will result in indexing errors
input_ids = pad_sequences(input_ids, maxlen=250)
test_ids = pad_sequences(test_ids, maxlen=250)
model = BertModel.from_pretrained(BERT_MODEL)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight'] - This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
input_embeddings = model.get_input_embeddings()
import torch
print(input_embeddings(torch.tensor(input_ids[0])).shape)
print(input_ids[0].shape)
torch.Size([250, 768]) (250,)
# dir(input_embeddings)
parameters = next(input_embeddings.parameters())
embedding_matrix = parameters.detach().numpy()
embedding_matrix.shape
(30522, 768)
# y_nums_train = [int(y =='dementia') for y in y_seq_train]
y_nums_train = (y_seq_train == 'dementia').astype(int)
y_nums_test = (y_seq_test == 'dementia').astype(int)
history = History()
model = Sequential()
model.add(Embedding(input_dim = tokenizer.vocab_size, output_dim = 768, weights = [embedding_matrix], input_length=250, trainable = False))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
Model: "sequential_10"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_10 (Embedding) (None, 250, 768) 23440896
lstm_10 (LSTM) (None, 128) 459264
dense_10 (Dense) (None, 1) 129
=================================================================
Total params: 23,900,289
Trainable params: 459,393
Non-trainable params: 23,440,896
_________________________________________________________________
None
Xwords_train = input_ids
Xwords_test = test_ids
import os
import tensorflow as tf
checkpoint_path = r"bert_lstm"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
monitor="val_loss",
save_best_only=True,
save_weights_only=True,
verbose=1)
# Train the model with the new callback
model.fit(Xwords_train, y_nums_train, epochs=45, batch_size=256,verbose = 1,callbacks = [history, cp_callback],validation_split=0.1) # Pass callback to training
Train on 444 samples, validate on 50 samples Epoch 1/45 444/444 [==============================] - ETA: 0s - loss: 0.6902 - acc: 0.5023
C:\Users\cobus\AppData\Roaming\Python\Python39\site-packages\keras\engine\training_v1.py:2335: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. updates = self.state_updates
Epoch 1: val_loss improved from inf to 0.69713, saving model to bert_lstm 444/444 [==============================] - 12s 28ms/sample - loss: 0.6902 - acc: 0.5023 - val_loss: 0.6971 - val_acc: 0.5200 Epoch 2/45 444/444 [==============================] - ETA: 0s - loss: 0.6844 - acc: 0.5608 Epoch 2: val_loss did not improve from 0.69713 444/444 [==============================] - 11s 26ms/sample - loss: 0.6844 - acc: 0.5608 - val_loss: 0.7028 - val_acc: 0.5200 Epoch 3/45 444/444 [==============================] - ETA: 0s - loss: 0.6835 - acc: 0.5608 Epoch 3: val_loss improved from 0.69713 to 0.69282, saving model to bert_lstm 444/444 [==============================] - 14s 31ms/sample - loss: 0.6835 - acc: 0.5608 - val_loss: 0.6928 - val_acc: 0.5200 Epoch 4/45 444/444 [==============================] - ETA: 0s - loss: 0.6781 - acc: 0.5608 Epoch 4: val_loss improved from 0.69282 to 0.68731, saving model to bert_lstm 444/444 [==============================] - 13s 29ms/sample - loss: 0.6781 - acc: 0.5608 - val_loss: 0.6873 - val_acc: 0.5200 Epoch 5/45 444/444 [==============================] - ETA: 0s - loss: 0.6761 - acc: 0.5631 Epoch 5: val_loss improved from 0.68731 to 0.68479, saving model to bert_lstm 444/444 [==============================] - 13s 30ms/sample - loss: 0.6761 - acc: 0.5631 - val_loss: 0.6848 - val_acc: 0.5200 Epoch 6/45 444/444 [==============================] - ETA: 0s - loss: 0.6737 - acc: 0.5743 Epoch 6: val_loss improved from 0.68479 to 0.68388, saving model to bert_lstm 444/444 [==============================] - 15s 34ms/sample - loss: 0.6737 - acc: 0.5743 - val_loss: 0.6839 - val_acc: 0.5200 Epoch 7/45 444/444 [==============================] - ETA: 0s - loss: 0.6687 - acc: 0.5653 Epoch 7: val_loss did not improve from 0.68388 444/444 [==============================] - 14s 31ms/sample - loss: 0.6687 - acc: 0.5653 - val_loss: 0.6841 - val_acc: 0.5200 Epoch 8/45 444/444 [==============================] - ETA: 0s - loss: 0.6610 - acc: 0.5743 Epoch 8: val_loss did not improve from 0.68388 444/444 [==============================] - 14s 30ms/sample - loss: 0.6610 - acc: 0.5743 - val_loss: 0.6846 - val_acc: 0.5200 Epoch 9/45 444/444 [==============================] - ETA: 0s - loss: 0.6562 - acc: 0.5856 Epoch 9: val_loss improved from 0.68388 to 0.67070, saving model to bert_lstm 444/444 [==============================] - 14s 32ms/sample - loss: 0.6562 - acc: 0.5856 - val_loss: 0.6707 - val_acc: 0.6800 Epoch 10/45 444/444 [==============================] - ETA: 0s - loss: 0.6410 - acc: 0.6374 Epoch 10: val_loss improved from 0.67070 to 0.65984, saving model to bert_lstm 444/444 [==============================] - 15s 34ms/sample - loss: 0.6410 - acc: 0.6374 - val_loss: 0.6598 - val_acc: 0.6200 Epoch 11/45 444/444 [==============================] - ETA: 0s - loss: 0.6367 - acc: 0.6599 Epoch 11: val_loss improved from 0.65984 to 0.65345, saving model to bert_lstm 444/444 [==============================] - 15s 33ms/sample - loss: 0.6367 - acc: 0.6599 - val_loss: 0.6535 - val_acc: 0.6200 Epoch 12/45 444/444 [==============================] - ETA: 0s - loss: 0.6387 - acc: 0.6486 Epoch 12: val_loss improved from 0.65345 to 0.65062, saving model to bert_lstm 444/444 [==============================] - 15s 34ms/sample - loss: 0.6387 - acc: 0.6486 - val_loss: 0.6506 - val_acc: 0.6800 Epoch 13/45 444/444 [==============================] - ETA: 0s - loss: 0.6197 - acc: 0.6892 Epoch 13: val_loss did not improve from 0.65062 444/444 [==============================] - 15s 33ms/sample - loss: 0.6197 - acc: 0.6892 - val_loss: 0.6617 - val_acc: 0.6800 Epoch 14/45 444/444 [==============================] - ETA: 0s - loss: 0.6186 - acc: 0.6329 Epoch 14: val_loss did not improve from 0.65062 444/444 [==============================] - 17s 38ms/sample - loss: 0.6186 - acc: 0.6329 - val_loss: 0.6733 - val_acc: 0.6200 Epoch 15/45 444/444 [==============================] - ETA: 0s - loss: 0.6115 - acc: 0.6554 Epoch 15: val_loss did not improve from 0.65062 444/444 [==============================] - 14s 31ms/sample - loss: 0.6115 - acc: 0.6554 - val_loss: 0.6585 - val_acc: 0.6600 Epoch 16/45 444/444 [==============================] - ETA: 0s - loss: 0.6090 - acc: 0.6532 Epoch 16: val_loss did not improve from 0.65062 444/444 [==============================] - 13s 30ms/sample - loss: 0.6090 - acc: 0.6532 - val_loss: 0.6641 - val_acc: 0.6400 Epoch 17/45 444/444 [==============================] - ETA: 0s - loss: 0.5990 - acc: 0.6847 Epoch 17: val_loss improved from 0.65062 to 0.64718, saving model to bert_lstm 444/444 [==============================] - 15s 35ms/sample - loss: 0.5990 - acc: 0.6847 - val_loss: 0.6472 - val_acc: 0.6600 Epoch 18/45 444/444 [==============================] - ETA: 0s - loss: 0.5775 - acc: 0.6959 Epoch 18: val_loss did not improve from 0.64718 444/444 [==============================] - 15s 33ms/sample - loss: 0.5775 - acc: 0.6959 - val_loss: 0.6474 - val_acc: 0.6400 Epoch 19/45 444/444 [==============================] - ETA: 0s - loss: 0.5577 - acc: 0.7207 Epoch 19: val_loss improved from 0.64718 to 0.60939, saving model to bert_lstm 444/444 [==============================] - 19s 42ms/sample - loss: 0.5577 - acc: 0.7207 - val_loss: 0.6094 - val_acc: 0.6800 Epoch 20/45 444/444 [==============================] - ETA: 0s - loss: 0.5512 - acc: 0.7342 Epoch 20: val_loss improved from 0.60939 to 0.58464, saving model to bert_lstm 444/444 [==============================] - 22s 50ms/sample - loss: 0.5512 - acc: 0.7342 - val_loss: 0.5846 - val_acc: 0.6800 Epoch 21/45 444/444 [==============================] - ETA: 0s - loss: 0.5640 - acc: 0.7162 Epoch 21: val_loss did not improve from 0.58464 444/444 [==============================] - 20s 44ms/sample - loss: 0.5640 - acc: 0.7162 - val_loss: 0.6822 - val_acc: 0.6000 Epoch 22/45 444/444 [==============================] - ETA: 0s - loss: 0.5633 - acc: 0.6847 Epoch 22: val_loss did not improve from 0.58464 444/444 [==============================] - 21s 47ms/sample - loss: 0.5633 - acc: 0.6847 - val_loss: 0.7210 - val_acc: 0.6000 Epoch 23/45 444/444 [==============================] - ETA: 0s - loss: 0.5668 - acc: 0.6937 Epoch 23: val_loss did not improve from 0.58464 444/444 [==============================] - 22s 50ms/sample - loss: 0.5668 - acc: 0.6937 - val_loss: 0.7100 - val_acc: 0.6400 Epoch 24/45 444/444 [==============================] - ETA: 0s - loss: 0.5636 - acc: 0.7320 Epoch 24: val_loss did not improve from 0.58464 444/444 [==============================] - 21s 48ms/sample - loss: 0.5636 - acc: 0.7320 - val_loss: 0.6845 - val_acc: 0.6000 Epoch 25/45 444/444 [==============================] - ETA: 0s - loss: 0.5601 - acc: 0.7320 Epoch 25: val_loss did not improve from 0.58464 444/444 [==============================] - 18s 41ms/sample - loss: 0.5601 - acc: 0.7320 - val_loss: 0.6466 - val_acc: 0.6800 Epoch 26/45 444/444 [==============================] - ETA: 0s - loss: 0.5308 - acc: 0.7477 Epoch 26: val_loss did not improve from 0.58464 444/444 [==============================] - 24s 55ms/sample - loss: 0.5308 - acc: 0.7477 - val_loss: 0.6162 - val_acc: 0.7200 Epoch 27/45 444/444 [==============================] - ETA: 0s - loss: 0.5270 - acc: 0.7365 Epoch 27: val_loss did not improve from 0.58464 444/444 [==============================] - 22s 50ms/sample - loss: 0.5270 - acc: 0.7365 - val_loss: 0.6150 - val_acc: 0.6800 Epoch 28/45 444/444 [==============================] - ETA: 0s - loss: 0.5280 - acc: 0.7320 Epoch 28: val_loss did not improve from 0.58464 444/444 [==============================] - 24s 54ms/sample - loss: 0.5280 - acc: 0.7320 - val_loss: 0.6271 - val_acc: 0.6200 Epoch 29/45 444/444 [==============================] - ETA: 0s - loss: 0.5459 - acc: 0.7117 Epoch 29: val_loss did not improve from 0.58464 444/444 [==============================] - 16s 37ms/sample - loss: 0.5459 - acc: 0.7117 - val_loss: 0.6020 - val_acc: 0.6400 Epoch 30/45 444/444 [==============================] - ETA: 0s - loss: 0.5283 - acc: 0.7432 Epoch 30: val_loss did not improve from 0.58464 444/444 [==============================] - 16s 37ms/sample - loss: 0.5283 - acc: 0.7432 - val_loss: 0.6345 - val_acc: 0.6600 Epoch 31/45 444/444 [==============================] - ETA: 0s - loss: 0.5090 - acc: 0.7500 Epoch 31: val_loss did not improve from 0.58464 444/444 [==============================] - 17s 38ms/sample - loss: 0.5090 - acc: 0.7500 - val_loss: 0.6610 - val_acc: 0.6800 Epoch 32/45 444/444 [==============================] - ETA: 0s - loss: 0.5156 - acc: 0.7838 Epoch 32: val_loss did not improve from 0.58464 444/444 [==============================] - 17s 39ms/sample - loss: 0.5156 - acc: 0.7838 - val_loss: 0.6842 - val_acc: 0.6000 Epoch 33/45 444/444 [==============================] - ETA: 0s - loss: 0.5176 - acc: 0.7748 Epoch 33: val_loss did not improve from 0.58464 444/444 [==============================] - 17s 38ms/sample - loss: 0.5176 - acc: 0.7748 - val_loss: 0.6914 - val_acc: 0.6400 Epoch 34/45 444/444 [==============================] - ETA: 0s - loss: 0.5059 - acc: 0.7568 Epoch 34: val_loss did not improve from 0.58464 444/444 [==============================] - 17s 37ms/sample - loss: 0.5059 - acc: 0.7568 - val_loss: 0.6820 - val_acc: 0.6200 Epoch 35/45 444/444 [==============================] - ETA: 0s - loss: 0.4775 - acc: 0.7838 Epoch 35: val_loss did not improve from 0.58464 444/444 [==============================] - 18s 40ms/sample - loss: 0.4775 - acc: 0.7838 - val_loss: 0.6472 - val_acc: 0.6800 Epoch 36/45 444/444 [==============================] - ETA: 0s - loss: 0.4917 - acc: 0.7725 Epoch 36: val_loss did not improve from 0.58464 444/444 [==============================] - 19s 42ms/sample - loss: 0.4917 - acc: 0.7725 - val_loss: 0.6032 - val_acc: 0.7400 Epoch 37/45 444/444 [==============================] - ETA: 0s - loss: 0.4683 - acc: 0.7680 Epoch 37: val_loss did not improve from 0.58464 444/444 [==============================] - 22s 50ms/sample - loss: 0.4683 - acc: 0.7680 - val_loss: 0.5898 - val_acc: 0.7200 Epoch 38/45 444/444 [==============================] - ETA: 0s - loss: 0.4488 - acc: 0.7950 Epoch 38: val_loss did not improve from 0.58464 444/444 [==============================] - 20s 46ms/sample - loss: 0.4488 - acc: 0.7950 - val_loss: 0.6075 - val_acc: 0.7000 Epoch 39/45 444/444 [==============================] - ETA: 0s - loss: 0.4539 - acc: 0.7815 Epoch 39: val_loss did not improve from 0.58464 444/444 [==============================] - 24s 54ms/sample - loss: 0.4539 - acc: 0.7815 - val_loss: 0.6566 - val_acc: 0.6600 Epoch 40/45 444/444 [==============================] - ETA: 0s - loss: 0.4532 - acc: 0.7973 Epoch 40: val_loss did not improve from 0.58464 444/444 [==============================] - 20s 44ms/sample - loss: 0.4532 - acc: 0.7973 - val_loss: 0.6546 - val_acc: 0.6800 Epoch 41/45 444/444 [==============================] - ETA: 0s - loss: 0.4481 - acc: 0.8063 Epoch 41: val_loss did not improve from 0.58464 444/444 [==============================] - 23s 52ms/sample - loss: 0.4481 - acc: 0.8063 - val_loss: 0.6267 - val_acc: 0.7000 Epoch 42/45 444/444 [==============================] - ETA: 0s - loss: 0.4200 - acc: 0.8041 Epoch 42: val_loss did not improve from 0.58464 444/444 [==============================] - 27s 61ms/sample - loss: 0.4200 - acc: 0.8041 - val_loss: 0.5940 - val_acc: 0.7200 Epoch 43/45 444/444 [==============================] - ETA: 0s - loss: 0.4056 - acc: 0.8018 Epoch 43: val_loss did not improve from 0.58464 444/444 [==============================] - 25s 56ms/sample - loss: 0.4056 - acc: 0.8018 - val_loss: 0.6029 - val_acc: 0.7600 Epoch 44/45 444/444 [==============================] - ETA: 0s - loss: 0.4686 - acc: 0.7860 Epoch 44: val_loss did not improve from 0.58464 444/444 [==============================] - 21s 48ms/sample - loss: 0.4686 - acc: 0.7860 - val_loss: 0.6744 - val_acc: 0.7000 Epoch 45/45 444/444 [==============================] - ETA: 0s - loss: 0.4140 - acc: 0.8221 Epoch 45: val_loss did not improve from 0.58464 444/444 [==============================] - 20s 45ms/sample - loss: 0.4140 - acc: 0.8221 - val_loss: 0.6702 - val_acc: 0.6800
<keras.callbacks.History at 0x16dfc2cef10>
# Loads the weights
model.load_weights(checkpoint_path)
# Re-evaluate the model
loss, acc = model.evaluate(Xwords_test, y_nums_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
2/2 - 0s - loss: 0.5640 - accuracy: 0.7091 - 155ms/epoch - 78ms/step Restored model, accuracy: 70.91%
from sklearn.metrics import roc_curve , roc_auc_score
y_pred_probas = model.predict(Xwords_test)
y_pred_probas = [i[0] for i in y_pred_probas]
fpr, tpr, thresholds = roc_curve(y_seq_test, y_pred_probas, pos_label = 'dementia')
# roc_auc = roc_auc_score(y_test, scores)
plt.plot(fpr, tpr)
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()
C:\Users\cobus\AppData\Roaming\Python\Python39\site-packages\keras\engine\training_v1.py:2359: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. updates=self.state_updates,
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
final = final/100
print(final)
0.32
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
[[12 12]
[ 3 28]]
precision recall f1-score support
0 0.80 0.50 0.62 24
1 0.70 0.90 0.79 31
accuracy 0.73 55
macro avg 0.75 0.70 0.70 55
weighted avg 0.74 0.73 0.71 55
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
history = History()
model_bilstm = Sequential()
model_bilstm.add(Embedding(input_dim = tokenizer.vocab_size, output_dim = 768, weights = [embedding_matrix], input_length=250, trainable = False))
model_bilstm.add(Bidirectional(LSTM(128, recurrent_dropout=0.2)))
model_bilstm.add(Dense(1, activation='sigmoid'))
model_bilstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model_bilstm.summary())
Model: "sequential_9"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_9 (Embedding) (None, 250, 768) 23440896
bidirectional_1 (Bidirectio (None, 256) 918528
nal)
dense_9 (Dense) (None, 1) 257
=================================================================
Total params: 24,359,681
Trainable params: 918,785
Non-trainable params: 23,440,896
_________________________________________________________________
None
import os
import tensorflow as tf
checkpoint_path = r"bert_bilstm"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
monitor="val_loss",
save_best_only=True,
save_weights_only=True,
verbose=1)
# Train the model with the new callback
model_bilstm.fit(Xwords_train, y_nums_train, epochs=45, batch_size=256,verbose = 1,callbacks = [history, cp_callback],validation_split=0.1) # Pass callback to training
# Loads the weights
model_bilstm.load_weights(checkpoint_path)
# Re-evaluate the model
loss, acc = model_bilstm.evaluate(Xwords_test, y_nums_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
2/2 - 0s - loss: 0.6059 - accuracy: 0.6909 - 292ms/epoch - 146ms/step Restored model, accuracy: 69.09%
from sklearn.metrics import roc_curve , roc_auc_score
y_pred_probas = model_bilstm.predict(Xwords_test)
y_pred_probas = [i[0] for i in y_pred_probas]
fpr, tpr, thresholds = roc_curve(y_seq_test, y_pred_probas, pos_label = 'dementia')
#roc_auc = roc_auc_score(y_test, scores)
plt.plot(fpr, tpr)
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()
2/2 [==============================] - 1s 149ms/step
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model_bilstm.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
print(final)
2/2 [==============================] - 0s 144ms/step 19
final = final/100
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
[[ 6 18]
[ 1 30]]
precision recall f1-score support
0 0.86 0.25 0.39 24
1 0.62 0.97 0.76 31
accuracy 0.65 55
macro avg 0.74 0.61 0.57 55
weighted avg 0.73 0.65 0.60 55
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
history = History()
def create_conv_model():
model_conv = Sequential()
model_conv.add(Embedding(input_dim = tokenizer.vocab_size, output_dim = 768, weights = [embedding_matrix], input_length=250, trainable = False))
model_conv.add(Dropout(0.1))
model_conv.add(Conv1D(32, 5, activation='relu'))
model_conv.add(MaxPooling1D(pool_size=4))
model_conv.add(LSTM(64))
model_conv.add(Dense(1, activation='sigmoid'))
model_conv.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])
return model_conv
import os
import tensorflow as tf
checkpoint_path = r"bert_cnnlstm"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, monitor="val_loss", save_best_only=True,
save_weights_only=True,
verbose=1)
# Train the model with the new callback
model_conv = create_conv_model()
model_conv.fit(Xwords_train, y_nums_train, epochs=45, batch_size=256,verbose = 1,callbacks = [history, cp_callback],validation_split=0.1) # Pass callback to training
# Loads the weights
model_conv.load_weights(checkpoint_path)
# Re-evaluate the model
loss, acc = model_conv.evaluate(Xwords_test, y_nums_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))
2/2 - 0s - loss: 0.4162 - accuracy: 0.8727 - 30ms/epoch - 15ms/step Restored model, accuracy: 87.27%
from sklearn.metrics import roc_curve , roc_auc_score
y_pred_probas = model_conv.predict(Xwords_test)
y_pred_probas = [i[0] for i in y_pred_probas]
fpr, tpr, thresholds = roc_curve(y_seq_test, y_pred_probas, pos_label = 'dementia')
#roc_auc = roc_auc_score(y_test, scores)
plt.plot(fpr, tpr)
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.show()
2/2 [==============================] - 0s 6ms/step
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model_conv.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
print(final)
2/2 [==============================] - 0s 6ms/step 41
final = final/100
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
[[21 3]
[ 4 27]]
precision recall f1-score support
0 0.84 0.88 0.86 24
1 0.90 0.87 0.89 31
accuracy 0.87 55
macro avg 0.87 0.87 0.87 55
weighted avg 0.87 0.87 0.87 55
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
doc2vec LSTM
model = Sequential()
model.add(Embedding(embedding_matrix.shape[0], output_dim = 100, weights = [embedding_matrix], input_length=100, trainable = False))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
# Loads the weights
checkpoint_path = 'doc_lstm'
model.load_weights(checkpoint_path)
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7d5a786f2590>
model.compile(tf.keras.optimizers.legacy.Adam(), run_eagerly=True)
# model = Sequential()
# model.add(Embedding(embedding_matrix.shape[0], output_dim = 100, weights = [embedding_matrix], input_length=100, trainable = False))
# model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
# model.add(Dense(1, activation='sigmoid'))
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# print(model.summary())
model.load_weights(r'C:\Users\cobus\Desktop\Thesis\Best Models\doc_lstm')
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x16e69a69ac0>
!pip install shap
Requirement already satisfied: shap in /usr/local/lib/python3.10/dist-packages (0.45.0) Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from shap) (1.25.2) Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from shap) (1.11.4) Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from shap) (1.2.2) Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from shap) (2.0.3) Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.10/dist-packages (from shap) (4.66.2) Requirement already satisfied: packaging>20.9 in /usr/local/lib/python3.10/dist-packages (from shap) (24.0) Requirement already satisfied: slicer==0.0.7 in /usr/local/lib/python3.10/dist-packages (from shap) (0.0.7) Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from shap) (0.58.1) Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from shap) (2.2.1) Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->shap) (0.41.1) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->shap) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->shap) (2023.4) Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->shap) (2024.1) Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->shap) (1.4.0) Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->shap) (3.4.0) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->shap) (1.16.0)
# Xwords_test[:10]
tf.__version__
'2.14.1'
# from tensorflow.python.framework.ops import enable_eager_execution
# enable_eager_execution()
tf.executing_eagerly()
True
# model.compile(run_eagerly=True)
Xwords_test.shape
(55, 100)
import shap
# we use the first 100 training examples as our background dataset to integrate over
explainer = shap.Explainer(model, Xwords_train[:75])
# explain the first 10 predictions
# explaining each prediction requires 2 * background dataset size runs
shap_values = explainer.shap_values(Xwords_test[:10])
PermutationExplainer explainer: 11it [07:51, 47.20s/it]
# dir(explainer)
# explainer.explain_row(Xwords_test[0])
# init the JS visualization code
shap.initjs()
words = tokenizer.word_index
num2word = {}
for w in words.keys():
num2word[words[w]] = w
x_test_words = np.stack(
[
np.array(list(map(lambda x: num2word.get(x, "NONE"), Xwords_test[i])))
for i in range(10)
]
)
# plot the explanation of the first prediction
# Note the model is "multi-output" because it is rank-2 but only has one column
shap.force_plot(explainer.expected_value[0], shap_values[0][0], x_test_words[0])
row = 22
print("actual class is", y_seq_test.iloc[22])
# X_words_test = pd.DataFrame(X_test, columns=vectorizer.get_feature_names_out())
choosen_instance = X_seq_test.iloc[row]
shap_values = explainer.shap_values(choosen_instance)
shap.initjs()
shap.plots.force(explainer.expected_value[0], shap_values[:, :], choosen_instance, matplotlib=True)
!pip install alibi[tensorflow]
Collecting alibi[tensorflow]
Downloading alibi-0.9.5-py3-none-any.whl (522 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 522.1/522.1 kB 3.2 MB/s eta 0:00:00
Requirement already satisfied: numpy<2.0.0,>=1.16.2 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (1.25.2)
Requirement already satisfied: pandas<3.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (2.0.3)
Requirement already satisfied: scikit-learn<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (1.2.2)
Requirement already satisfied: spacy[lookups]<4.0.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (3.7.4)
Requirement already satisfied: blis<0.8.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (0.7.11)
Requirement already satisfied: scikit-image<0.23,>=0.17.2 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (0.19.3)
Requirement already satisfied: requests<3.0.0,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (2.31.0)
Requirement already satisfied: Pillow<11.0,>=5.4.1 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (9.4.0)
Requirement already satisfied: attrs<24.0.0,>=19.2.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (23.2.0)
Requirement already satisfied: scipy<2.0.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (1.11.4)
Requirement already satisfied: matplotlib<4.0.0,>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (3.7.1)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (4.11.0)
Collecting dill<0.4.0,>=0.3.0 (from alibi[tensorflow])
Downloading dill-0.3.8-py3-none-any.whl (116 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 116.3/116.3 kB 12.1 MB/s eta 0:00:00
Requirement already satisfied: transformers<5.0.0,>=4.7.0 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (4.38.2)
Requirement already satisfied: tqdm<5.0.0,>=4.28.1 in /usr/local/lib/python3.10/dist-packages (from alibi[tensorflow]) (4.66.2)
Collecting pydantic<2.0.0 (from alibi[tensorflow])
Downloading pydantic-1.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.1/3.1 MB 27.0 MB/s eta 0:00:00
Collecting tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0 (from alibi[tensorflow])
Downloading tensorflow-2.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (489.9 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 489.9/489.9 MB 1.1 MB/s eta 0:00:00
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<4.0.0,>=3.0.0->alibi[tensorflow]) (1.2.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib<4.0.0,>=3.0.0->alibi[tensorflow]) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<4.0.0,>=3.0.0->alibi[tensorflow]) (4.51.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<4.0.0,>=3.0.0->alibi[tensorflow]) (1.4.5)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<4.0.0,>=3.0.0->alibi[tensorflow]) (24.0)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<4.0.0,>=3.0.0->alibi[tensorflow]) (3.1.2)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib<4.0.0,>=3.0.0->alibi[tensorflow]) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0.0,>=1.0.0->alibi[tensorflow]) (2023.4)
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0.0,>=1.0.0->alibi[tensorflow]) (2024.1)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.21.0->alibi[tensorflow]) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.21.0->alibi[tensorflow]) (3.6)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.21.0->alibi[tensorflow]) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.21.0->alibi[tensorflow]) (2024.2.2)
Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image<0.23,>=0.17.2->alibi[tensorflow]) (3.3)
Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image<0.23,>=0.17.2->alibi[tensorflow]) (2.31.6)
Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image<0.23,>=0.17.2->alibi[tensorflow]) (2024.2.12)
Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image<0.23,>=0.17.2->alibi[tensorflow]) (1.6.0)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn<2.0.0,>=1.0.0->alibi[tensorflow]) (1.4.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn<2.0.0,>=1.0.0->alibi[tensorflow]) (3.4.0)
Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (3.0.12)
Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (1.0.5)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (1.0.10)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (2.0.8)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (3.0.9)
Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (8.2.3)
Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (1.1.2)
Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (2.4.8)
Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (2.0.10)
Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (0.3.4)
Requirement already satisfied: typer<0.10.0,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (0.9.4)
Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (6.4.0)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (3.1.3)
Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (67.7.2)
Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (3.3.0)
Collecting spacy-lookups-data<1.1.0,>=1.0.3 (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow])
Downloading spacy_lookups_data-1.0.5-py2.py3-none-any.whl (98.5 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 98.5/98.5 MB 1.5 MB/s eta 0:00:00
Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (1.4.0)
Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (1.6.3)
Requirement already satisfied: flatbuffers>=23.5.26 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (24.3.25)
Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (0.5.4)
Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (0.2.0)
Requirement already satisfied: h5py>=2.9.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (3.9.0)
Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (18.1.1)
Requirement already satisfied: ml-dtypes==0.2.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (0.2.0)
Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (3.3.0)
Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (3.20.3)
Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (1.16.0)
Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (2.4.0)
Requirement already satisfied: wrapt<1.15,>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (1.14.1)
Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (0.36.0)
Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (1.62.1)
Collecting tensorboard<2.15,>=2.14 (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow])
Downloading tensorboard-2.14.1-py3-none-any.whl (5.5 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.5/5.5 MB 1.5 MB/s eta 0:00:00
Collecting tensorflow-estimator<2.15,>=2.14.0 (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow])
Downloading tensorflow_estimator-2.14.0-py2.py3-none-any.whl (440 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 440.7/440.7 kB 1.7 MB/s eta 0:00:00
Collecting keras<2.15,>=2.14.0 (from tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow])
Downloading keras-2.14.0-py3-none-any.whl (1.7 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.7/1.7 MB 1.9 MB/s eta 0:00:00
Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.7.0->alibi[tensorflow]) (3.13.4)
Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.7.0->alibi[tensorflow]) (0.20.3)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.7.0->alibi[tensorflow]) (6.0.1)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.7.0->alibi[tensorflow]) (2023.12.25)
Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.7.0->alibi[tensorflow]) (0.15.2)
Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.7.0->alibi[tensorflow]) (0.4.2)
Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (0.43.0)
Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers<5.0.0,>=4.7.0->alibi[tensorflow]) (2023.6.0)
Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (2.27.0)
Collecting google-auth-oauthlib<1.1,>=0.5 (from tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow])
Downloading google_auth_oauthlib-1.0.0-py2.py3-none-any.whl (18 kB)
Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (3.6)
Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (0.7.2)
Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (3.0.2)
Requirement already satisfied: confection<1.0.0,>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from thinc<8.3.0,>=8.2.2->spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (0.1.4)
Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.10/dist-packages (from typer<0.10.0,>=0.3.0->spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (8.1.7)
Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from weasel<0.4.0,>=0.1.0->spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (0.16.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]) (2.1.5)
Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (5.3.3)
Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (0.4.0)
Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (4.9)
Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (1.3.1)
Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (0.6.0)
Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.15,>=2.14->tensorflow!=2.6.0,!=2.6.1,<2.15.0,>=2.0.0->alibi[tensorflow]) (3.2.2)
Installing collected packages: tensorflow-estimator, spacy-lookups-data, pydantic, keras, dill, google-auth-oauthlib, tensorboard, tensorflow, alibi
Attempting uninstall: tensorflow-estimator
Found existing installation: tensorflow-estimator 2.15.0
Uninstalling tensorflow-estimator-2.15.0:
Successfully uninstalled tensorflow-estimator-2.15.0
Attempting uninstall: pydantic
Found existing installation: pydantic 2.6.4
Uninstalling pydantic-2.6.4:
Successfully uninstalled pydantic-2.6.4
Attempting uninstall: keras
Found existing installation: keras 2.15.0
Uninstalling keras-2.15.0:
Successfully uninstalled keras-2.15.0
Attempting uninstall: google-auth-oauthlib
Found existing installation: google-auth-oauthlib 1.2.0
Uninstalling google-auth-oauthlib-1.2.0:
Successfully uninstalled google-auth-oauthlib-1.2.0
Attempting uninstall: tensorboard
Found existing installation: tensorboard 2.15.2
Uninstalling tensorboard-2.15.2:
Successfully uninstalled tensorboard-2.15.2
Attempting uninstall: tensorflow
Found existing installation: tensorflow 2.15.0
Uninstalling tensorflow-2.15.0:
Successfully uninstalled tensorflow-2.15.0
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tf-keras 2.15.1 requires tensorflow<2.16,>=2.15, but you have tensorflow 2.14.1 which is incompatible.
Successfully installed alibi-0.9.5 dill-0.3.8 google-auth-oauthlib-1.0.0 keras-2.14.0 pydantic-1.10.15 spacy-lookups-data-1.0.5 tensorboard-2.14.1 tensorflow-2.14.1 tensorflow-estimator-2.14.0
1/1 [==============================] - 2s 2s/step
# model.layers[0]
from alibi.explainers import IntegratedGradients
n_steps = 20
method = "gausslegendre"
internal_batch_size = 100
ig = IntegratedGradients(model,
layer=model.layers[0],
n_steps=n_steps,
method=method,
internal_batch_size=internal_batch_size)
preds = np.around(model.predict(Xwords_test)).astype(int).reshape(-1)
explanation = ig.explain(Xwords_test, target=preds)
# explanation = ig.explain(Xwords_test, target=y_nums_test)
len(explanation.attributions)
1
# If 4 dimensions, (1, # of observations, sequence length, embedding size)
# If 3 dimensions, (1, sequence length, embedding size)
attrs = np.array(explanation.attributions)
attrs = attrs.sum(axis=3)
print('Attributions shape:', attrs.shape)
from IPython.display import HTML
def hlstr(string, color='white'):
"""
Return HTML markup highlighting text with the desired color.
"""
return f"<mark style=background-color:{color}>{string} </mark>"
def colorize(attrs, cmap='PiYG'):
"""
Compute hex colors based on the attributions for a single instance.
Uses a diverging colorscale by default and normalizes and scales
the colormap so that colors are consistent with the attributions.
"""
import matplotlib as mpl
cmap_bound = np.abs(attrs).max()
norm = mpl.colors.Normalize(vmin=-cmap_bound, vmax=cmap_bound)
cmap = mpl.cm.get_cmap(cmap)
# now compute hex values of colors
colors = list(map(lambda x: mpl.colors.rgb2hex(cmap(norm(x))), attrs))
return colors
row_num = 9 # 9, 50, 2, 7, 11, 13, 33
words = tokenizer.sequences_to_texts(Xwords_test[[row_num]])[0].split()
colors = colorize(attrs[0, row_num])
<ipython-input-99-6faf338aa0bf>:18: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = mpl.cm.get_cmap(cmap)
print('Actual label = {}: {}'.format(y_nums_test.iloc[row_num], y_seq_test.iloc[row_num]))
print('Predicted label = {}: {}'.format(preds[row_num], ('control', 'dementia')[preds[row_num]]))
Actual label = 1: dementia Predicted label = 1: dementia
HTML("".join(list(map(hlstr, words, colors))))
row_num = 50 # 9, 50, 2, 7, 11, 13, 33
words = tokenizer.sequences_to_texts(Xwords_test[[row_num]])[0].split()
colors = colorize(attrs[0, row_num])
print('Actual label = {}: {}'.format(y_nums_test.iloc[row_num], y_seq_test.iloc[row_num]))
print('Predicted label = {}: {}'.format(preds[row_num], ('control', 'dementia')[preds[row_num]]))
display(HTML(f'whole text: {X_seq_test.iloc[row_num]}'))
HTML("".join(list(map(hlstr, words, colors))))
Actual label = 0: control Predicted label = 0: control
<ipython-input-99-6faf338aa0bf>:18: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = mpl.cm.get_cmap(cmap)
len(X_seq_test.iloc[row_num].split())
351
row_num = 2 # 9, 50, 2, 7, 11, 13, 33
words = tokenizer.sequences_to_texts(Xwords_test[[row_num]])[0].split()
colors = colorize(attrs[0, row_num])
print('Actual label = {}: {}'.format(y_nums_test.iloc[row_num], y_seq_test.iloc[row_num]))
print('Predicted label = {}: {}'.format(preds[row_num], ('control', 'dementia')[preds[row_num]]))
display(HTML(f'whole text: {X_seq_test.iloc[row_num]}'))
HTML("".join(list(map(hlstr, words, colors))))
Actual label = 0: control Predicted label = 0: control
<ipython-input-99-6faf338aa0bf>:18: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = mpl.cm.get_cmap(cmap)
row_num = 7 # 9, 50, 2, 7, 11, 13, 33
words = tokenizer.sequences_to_texts(Xwords_test[[row_num]])[0].split()
colors = colorize(attrs[0, row_num])
print('Actual label = {}: {}'.format(y_nums_test.iloc[row_num], y_seq_test.iloc[row_num]))
print('Predicted label = {}: {}'.format(preds[row_num], ('control', 'dementia')[preds[row_num]]))
display(HTML(f'whole text: {X_seq_test.iloc[row_num]}'))
HTML("".join(list(map(hlstr, words, colors))))
Actual label = 1: dementia Predicted label = 1: dementia
<ipython-input-99-6faf338aa0bf>:18: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = mpl.cm.get_cmap(cmap)
row_num = 11 # 9, 50, 2, 7, 11, 13, 33
words = tokenizer.sequences_to_texts(Xwords_test[[row_num]])[0].split()
colors = colorize(attrs[0, row_num])
print('Actual label = {}: {}'.format(y_nums_test.iloc[row_num], y_seq_test.iloc[row_num]))
print('Predicted label = {}: {}'.format(preds[row_num], ('control', 'dementia')[preds[row_num]]))
display(HTML(f'whole text: {X_seq_test.iloc[row_num]}'))
HTML("".join(list(map(hlstr, words, colors))))
Actual label = 1: dementia Predicted label = 1: dementia
<ipython-input-99-6faf338aa0bf>:18: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = mpl.cm.get_cmap(cmap)
row_num = 13 # 9, 50, 2, 7, 11, 13, 33
words = tokenizer.sequences_to_texts(Xwords_test[[row_num]])[0].split()
colors = colorize(attrs[0, row_num])
print('Actual label = {}: {}'.format(y_nums_test.iloc[row_num], y_seq_test.iloc[row_num]))
print('Predicted label = {}: {}'.format(preds[row_num], ('control', 'dementia')[preds[row_num]]))
display(HTML(f'whole text: {X_seq_test.iloc[row_num]}'))
HTML("".join(list(map(hlstr, words, colors))))
Actual label = 1: dementia Predicted label = 0: control
<ipython-input-99-6faf338aa0bf>:18: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = mpl.cm.get_cmap(cmap)
row_num = 33 # 9, 50, 2, 7, 11, 13, 33
words = tokenizer.sequences_to_texts(Xwords_test[[row_num]])[0].split()
colors = colorize(attrs[0, row_num])
print('Actual label = {}: {}'.format(y_nums_test.iloc[row_num], y_seq_test.iloc[row_num]))
print('Predicted label = {}: {}'.format(preds[row_num], ('control', 'dementia')[preds[row_num]]))
display(HTML(f'whole text: {X_seq_test.iloc[row_num]}'))
HTML("".join(list(map(hlstr, words, colors))))
Actual label = 1: dementia Predicted label = 1: dementia
<ipython-input-99-6faf338aa0bf>:18: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead. cmap = mpl.cm.get_cmap(cmap)
GE = shap.GradientExplainer(model_conv, Xwords_train.reshape((494, 100))) # X_train is 3d numpy.ndarray
shap_values = GE.shap_values(Xwords_test.reshape((55, 100))) # X_validate is 3d numpy.ndarray
import shap
# wrap = Sequential()
# for layer in model.layers:
# wrap.add_layer(layer)
GE = shap.GradientExplainer(model_conv, Xwords_train.reshape((494, 100))) # X_train is 3d numpy.ndarray
shap_values = GE.shap_values(Xwords_test.reshape((55, 100))) # X_validate is 3d numpy.ndarray
shap.initjs()
shap.summary_plot(
shap_values[0],
X_validate,
feature_names=list_of_your_columns_here,
max_display=50,
plot_type='bar')
from tensorflow.compat.v1.keras.backend import get_session
tf.compat.v1.disable_v2_behavior()
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
# Use the training data for deep explainer => can use fewer instances
explainer = shap.GradientExplainer(model, Xwords_train)
# explain the the testing instances (can use fewer instanaces)
# explaining each prediction requires 2 * background dataset size runs
shap_values = explainer.shap_values(Xwords_test)
# init the JS visualization code
shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0][0], features)
import shap
# we use the first 100 training examples as our background dataset to integrate over
explainer = shap.GradientExplainer(model, Xwords_train[:100])
# explain the first 10 predictions
# explaining each prediction requires 2 * background dataset size runs
shap_values = explainer.shap_values(Xwords_test[:10])
model = Sequential()
model.add(Embedding(embedding_matrix.shape[0], output_dim = 100, weights = [embedding_matrix], input_length=100, trainable = False))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
# checkpoint_path = r"doc_lstm"
# model.load_weights(checkpoint_path)
# model.save_weights('Best Models/doc_lstm')
model.load_weights(r'C:\Users\cobus\Desktop\Thesis\Best Models\Best Models\doc_lstm')
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x16df5ff1fd0>
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
final = final/100
print(final)
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
0.38
[[21 3]
[ 6 25]]
precision recall f1-score support
0 0.78 0.88 0.82 24
1 0.89 0.81 0.85 31
accuracy 0.84 55
macro avg 0.84 0.84 0.84 55
weighted avg 0.84 0.84 0.84 55
from lime.lime_text import LimeTextExplainer
class_names=['control','dementia']
explainer= LimeTextExplainer(class_names=class_names)
def predict_proba(arr):
list_tokenized_ex = tokenizer.texts_to_sequences(arr)
seqs = pad_sequences(list_tokenized_ex, maxlen=max_review_length)
preds = model.predict(seqs)
returnable = []
for pred in preds:
temp = pred[0]
returnable.append(np.array([1-temp, temp])) #I would recommend rounding temp and 1-temp off to 2 places
return np.array(returnable)
# print("Actual text:", X_seq_test.iloc[50])
print("Actual class:", y_seq_test.iloc[50])
explainer.explain_instance(X_seq_test.iloc[50], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[2])
explainer.explain_instance(X_seq_test.iloc[2], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[7])
explainer.explain_instance(X_seq_test.iloc[7], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[11])
explainer.explain_instance(X_seq_test.iloc[11], predict_proba).show_in_notebook(text=True)
Actual class: dementia
model_bilstm.load_weights(r'C:\Users\cobus\Desktop\Thesis\Best Models\Best Models\doc_bilstm')
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x16df619b6a0>
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model_bilstm.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
final = final/100
print(final)
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
0.5
[[24 0]
[ 6 25]]
precision recall f1-score support
0 0.80 1.00 0.89 24
1 1.00 0.81 0.89 31
accuracy 0.89 55
macro avg 0.90 0.90 0.89 55
weighted avg 0.91 0.89 0.89 55
from lime.lime_text import LimeTextExplainer
class_names=['control','dementia']
explainer= LimeTextExplainer(class_names=class_names)
def predict_proba(arr):
list_tokenized_ex = tokenizer.texts_to_sequences(arr)
seqs = pad_sequences(list_tokenized_ex, maxlen=max_review_length)
preds = model_bilstm.predict(seqs)
returnable = []
for pred in preds:
temp = pred[0]
returnable.append(np.array([1-temp, temp])) #I would recommend rounding temp and 1-temp off to 2 places
return np.array(returnable)
print("Actual class:", y_seq_test.iloc[50])
explainer.explain_instance(X_seq_test.iloc[50], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[2])
explainer.explain_instance(X_seq_test.iloc[2], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[7])
explainer.explain_instance(X_seq_test.iloc[7], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[11])
explainer.explain_instance(X_seq_test.iloc[11], predict_proba).show_in_notebook(text=True)
Actual class: dementia
model_conv1.load_weights(r'C:\Users\cobus\Desktop\Thesis\Best Models\Best Models\doc_cnnlstm')
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x16df632d7c0>
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model_conv1.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
final = final/100
print(final)
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
C:\Users\cobus\AppData\Roaming\Python\Python39\site-packages\keras\engine\training_v1.py:2359: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. updates=self.state_updates,
0.27
[[18 6]
[ 1 30]]
precision recall f1-score support
0 0.95 0.75 0.84 24
1 0.83 0.97 0.90 31
accuracy 0.87 55
macro avg 0.89 0.86 0.87 55
weighted avg 0.88 0.87 0.87 55
from lime.lime_text import LimeTextExplainer
class_names=['control','dementia']
explainer= LimeTextExplainer(class_names=class_names)
def predict_proba(arr):
list_tokenized_ex = tokenizer.texts_to_sequences(arr)
seqs = pad_sequences(list_tokenized_ex, maxlen=max_review_length)
preds = model_conv1.predict(seqs)
returnable = []
for pred in preds:
temp = pred[0]
returnable.append(np.array([1-temp, temp])) #I would recommend rounding temp and 1-temp off to 2 places
return np.array(returnable)
print("Actual class:", y_seq_test.iloc[50])
explainer.explain_instance(X_seq_test.iloc[50], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[2])
explainer.explain_instance(X_seq_test.iloc[2], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[7])
explainer.explain_instance(X_seq_test.iloc[7], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[11])
explainer.explain_instance(X_seq_test.iloc[11], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[33])
explainer.explain_instance(X_seq_test.iloc[33], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[13])
explainer.explain_instance(X_seq_test.iloc[13], predict_proba).show_in_notebook(text=True)
Actual class: dementia
model_conv.load_weights(r'C:\Users\cobus\Desktop\Thesis\Best Models\bert_cnnlstm')
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x16d86eec190>
pip show transformers
Note: you may need to restart the kernel to use updated packages.
WARNING: Ignoring invalid distribution -umpy (c:\users\cobus\anaconda3\lib\site-packages)
Name: transformers Version: 4.28.1 Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow Home-page: https://github.com/huggingface/transformers Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors) Author-email: transformers@huggingface.co License: Apache 2.0 License Location: c:\users\cobus\anaconda3\lib\site-packages Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, tokenizers, tqdm Required-by: sentence-transformers
#predicting classes on test data
from sklearn.metrics import f1_score
predict_x=model_conv.predict(Xwords_test)
score = []
for theshold in np.arange(0,1,.01):
classes_x= (predict_x >= theshold).astype(int)
ideal_score = f1_score(y_nums_test, classes_x)
score.append(ideal_score)
final = np.argmax(score)
final = final/100
print(final)
classes_x = (predict_x >= final).astype(int)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report
print(confusion_matrix(y_nums_test, classes_x))
print(classification_report(y_nums_test, classes_x))
C:\Users\cobus\AppData\Roaming\Python\Python39\site-packages\keras\engine\training_v1.py:2359: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. updates=self.state_updates,
0.38
[[19 5]
[ 3 28]]
precision recall f1-score support
0 0.86 0.79 0.83 24
1 0.85 0.90 0.88 31
accuracy 0.85 55
macro avg 0.86 0.85 0.85 55
weighted avg 0.86 0.85 0.85 55
from lime.lime_text import LimeTextExplainer
class_names=['control','dementia']
explainer= LimeTextExplainer(class_names=class_names)
def predict_proba(arr):
input_ids = []
# For every sentence...
for sent in arr:
encoded_dict = tokenizer.encode_plus(
sent, # Sentence to split into tokens
add_special_tokens = True, # Add special token '[CLS]' and '[SEP]'
padding = 'longest',
return_attention_mask = True, # Construct attention masks.
return_tensors = 'pt', # Return pytorch tensors.
)
new_input_ids = encoded_dict['input_ids'].reshape((-1,)).detach().numpy()
input_ids.append(new_input_ids)
seqs = pad_sequences(input_ids, maxlen=250) # 250
preds = model_conv.predict(seqs)
returnable = []
for pred in preds:
temp = pred[0]
returnable.append([1-temp, temp])
return np.array(returnable)
# dir(tokenizer)
print("Actual class:", y_seq_test.iloc[50])
explainer.explain_instance(X_seq_test.iloc[50], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[2])
explainer.explain_instance(X_seq_test.iloc[2], predict_proba).show_in_notebook(text=True)
Actual class: control
print("Actual class:", y_seq_test.iloc[7])
explainer.explain_instance(X_seq_test.iloc[7], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[11])
explainer.explain_instance(X_seq_test.iloc[11], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[13])
explainer.explain_instance(X_seq_test.iloc[13], predict_proba).show_in_notebook(text=True)
Actual class: dementia
print("Actual class:", y_seq_test.iloc[33])
explainer.explain_instance(X_seq_test.iloc[33], predict_proba).show_in_notebook(text=True)
Actual class: dementia
pip install -r requirements.txt --user
Note: you may need to restart the kernel to use updated packages.Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Requirement already satisfied: tqdm in c:\users\cobus\anaconda3\lib\site-packages (from -r requirements.txt (line 1)) (4.64.1)
Collecting numpy<=1.19.5
Downloading numpy-1.19.5-cp39-cp39-win_amd64.whl (13.3 MB)
--------------------------------------- 13.3/13.3 MB 25.2 MB/s eta 0:00:00
Requirement already satisfied: gensim in c:\users\cobus\appdata\roaming\python\python39\site-packages (from -r requirements.txt (line 3)) (4.3.2)
Requirement already satisfied: scikit-learn in c:\users\cobus\appdata\roaming\python\python39\site-packages (from -r requirements.txt (line 4)) (1.4.1.post1)
Requirement already satisfied: keras in c:\users\cobus\appdata\roaming\python\python39\site-packages (from -r requirements.txt (line 5)) (2.12.0)
Requirement already satisfied: tensorflow in c:\users\cobus\anaconda3\lib\site-packages (from -r requirements.txt (line 6)) (2.12.0)
Requirement already satisfied: shap in c:\users\cobus\appdata\roaming\python\python39\site-packages (from -r requirements.txt (line 7)) (0.45.0)
Requirement already satisfied: lime in c:\users\cobus\anaconda3\lib\site-packages (from -r requirements.txt (line 8)) (0.2.0.1)
Requirement already satisfied: pandas in c:\users\cobus\appdata\roaming\python\python39\site-packages (from -r requirements.txt (line 9)) (2.2.1)
Requirement already satisfied: matplotlib in c:\users\cobus\anaconda3\lib\site-packages (from -r requirements.txt (line 10)) (3.5.2)
Requirement already satisfied: nltk in c:\users\cobus\anaconda3\lib\site-packages (from -r requirements.txt (line 11)) (3.7)
Requirement already satisfied: scipy==1.7.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from -r requirements.txt (line 12)) (1.7.0)
Requirement already satisfied: jsonlines in c:\users\cobus\anaconda3\lib\site-packages (from -r requirements.txt (line 13)) (4.0.0)
Requirement already satisfied: alibi[tensorflow] in c:\users\cobus\appdata\roaming\python\python39\site-packages (from -r requirements.txt (line 14)) (0.9.5)
Requirement already satisfied: transformers in c:\users\cobus\anaconda3\lib\site-packages (from -r requirements.txt (line 15)) (4.28.1)
Requirement already satisfied: colorama in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tqdm->-r requirements.txt (line 1)) (0.4.6)
Requirement already satisfied: smart-open>=1.8.1 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from gensim->-r requirements.txt (line 3)) (6.4.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from scikit-learn->-r requirements.txt (line 4)) (3.4.0)
Requirement already satisfied: joblib>=1.2.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from scikit-learn->-r requirements.txt (line 4)) (1.3.2)
Requirement already satisfied: tensorflow-intel==2.12.0 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow->-r requirements.txt (line 6)) (2.12.0)
Requirement already satisfied: opt-einsum>=2.3.2 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (3.3.0)
Requirement already satisfied: astunparse>=1.6.0 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (1.6.3)
Requirement already satisfied: h5py>=2.9.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (3.10.0)
Requirement already satisfied: tensorflow-estimator<2.13,>=2.12.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (2.12.0)
Requirement already satisfied: tensorboard<2.13,>=2.12 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (2.12.2)
Requirement already satisfied: setuptools in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (69.2.0)
Requirement already satisfied: jax>=0.3.15 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (0.4.8)
Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (4.25.3)
Requirement already satisfied: typing-extensions>=3.6.6 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (4.5.0)
Requirement already satisfied: flatbuffers>=2.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (24.3.25)
Requirement already satisfied: absl-py>=1.0.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (2.1.0)
Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (0.31.0)
Requirement already satisfied: grpcio<2.0,>=1.24.3 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (1.54.0)
Requirement already satisfied: wrapt<1.15,>=1.11.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (1.14.1)
Requirement already satisfied: libclang>=13.0.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (18.1.1)
Requirement already satisfied: packaging in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (24.0)
INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.
Collecting tensorflow
Downloading tensorflow-2.16.1-cp39-cp39-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.16.1
Downloading tensorflow_intel-2.16.1-cp39-cp39-win_amd64.whl (376.9 MB)
------------------------------------- 376.9/376.9 MB 93.0 MB/s eta 0:00:00
Requirement already satisfied: requests<3,>=2.21.0 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow-intel==2.16.1->tensorflow->-r requirements.txt (line 6)) (2.31.0)
Collecting ml-dtypes~=0.3.1
Downloading ml_dtypes-0.3.2-cp39-cp39-win_amd64.whl (127 kB)
---------------------------------------- 127.7/127.7 kB ? eta 0:00:00
Collecting keras
Downloading keras-3.1.1-py3-none-any.whl (1.1 MB)
---------------------------------------- 1.1/1.1 MB 69.9 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.15.1-cp39-cp39-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.15.1
Downloading tensorflow_intel-2.15.1-cp39-cp39-win_amd64.whl (300.8 MB)
------------------------------------- 300.8/300.8 MB 10.1 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.15.0-cp39-cp39-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.15.0
Downloading tensorflow_intel-2.15.0-cp39-cp39-win_amd64.whl (300.8 MB)
------------------------------------- 300.8/300.8 MB 27.3 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.14.1-cp39-cp39-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.14.1
Downloading tensorflow_intel-2.14.1-cp39-cp39-win_amd64.whl (284.1 MB)
-------------------------------------- 284.1/284.1 MB 5.7 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.14.0-cp39-cp39-win_amd64.whl (2.1 kB)
Collecting tensorflow-intel==2.14.0
Downloading tensorflow_intel-2.14.0-cp39-cp39-win_amd64.whl (284.1 MB)
------------------------------------ 284.1/284.1 MB 108.8 MB/s eta 0:00:00
Collecting ml-dtypes==0.2.0
Downloading ml_dtypes-0.2.0-cp39-cp39-win_amd64.whl (938 kB)
------------------------------------- 938.4/938.4 kB 58.0 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.13.1-cp39-cp39-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.13.1
Downloading tensorflow_intel-2.13.1-cp39-cp39-win_amd64.whl (276.5 MB)
------------------------------------- 276.5/276.5 MB 92.9 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.13.0-cp39-cp39-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.13.0
Downloading tensorflow_intel-2.13.0-cp39-cp39-win_amd64.whl (276.5 MB)
-------------------------------------- 276.5/276.5 MB 5.1 MB/s eta 0:00:00
INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.
Collecting tensorflow
Downloading tensorflow-2.12.1-cp39-cp39-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.12.1
Downloading tensorflow_intel-2.12.1-cp39-cp39-win_amd64.whl (272.8 MB)
-------------------------------------- 272.8/272.8 MB 6.7 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.12.0-cp39-cp39-win_amd64.whl (1.9 kB)
Downloading tensorflow-2.11.1-cp39-cp39-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.11.1
Downloading tensorflow_intel-2.11.1-cp39-cp39-win_amd64.whl (266.3 MB)
-------------------------------------- 266.3/266.3 MB 5.6 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.11.0-cp39-cp39-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.11.0
Downloading tensorflow_intel-2.11.0-cp39-cp39-win_amd64.whl (266.3 MB)
------------------------------------- 266.3/266.3 MB 38.4 MB/s eta 0:00:00
Collecting tensorflow
Downloading tensorflow-2.10.1-cp39-cp39-win_amd64.whl (455.9 MB)
------------------------------------- 455.9/455.9 MB 72.5 MB/s eta 0:00:00
Downloading tensorflow-2.10.0-cp39-cp39-win_amd64.whl (455.9 MB)
------------------------------------- 455.9/455.9 MB 10.2 MB/s eta 0:00:00
Downloading tensorflow-2.9.3-cp39-cp39-win_amd64.whl (444.1 MB)
------------------------------------- 444.1/444.1 MB 15.6 MB/s eta 0:00:00
Downloading tensorflow-2.9.2-cp39-cp39-win_amd64.whl (444.1 MB)
-------------------------------------- 444.1/444.1 MB 8.7 MB/s eta 0:00:00
Downloading tensorflow-2.9.1-cp39-cp39-win_amd64.whl (444.0 MB)
-------------------------------------- 444.0/444.0 MB 8.4 MB/s eta 0:00:00
Downloading tensorflow-2.9.0-cp39-cp39-win_amd64.whl (444.0 MB)
------------------------------------- 444.0/444.0 MB 36.2 MB/s eta 0:00:00
Downloading tensorflow-2.8.4-cp39-cp39-win_amd64.whl (438.4 MB)
-------------------------------------- 438.4/438.4 MB 4.5 MB/s eta 0:00:00
Downloading tensorflow-2.8.3-cp39-cp39-win_amd64.whl (438.4 MB)
-------------------------------------- 438.4/438.4 MB 5.9 MB/s eta 0:00:00
Downloading tensorflow-2.8.2-cp39-cp39-win_amd64.whl (438.3 MB)
-------------------------------------- 438.3/438.3 MB 3.7 MB/s eta 0:00:00
Downloading tensorflow-2.8.1-cp39-cp39-win_amd64.whl (438.3 MB)
-------------------------------------- 438.3/438.3 MB 3.5 MB/s eta 0:00:00
Downloading tensorflow-2.8.0-cp39-cp39-win_amd64.whl (438.0 MB)
------------------------------------- 438.0/438.0 MB 10.4 MB/s eta 0:00:00
Downloading tensorflow-2.7.4-cp39-cp39-win_amd64.whl (436.8 MB)
-------------------------------------- 436.8/436.8 MB 8.2 MB/s eta 0:00:00
Collecting keras
Downloading keras-2.7.0-py2.py3-none-any.whl (1.3 MB)
---------------------------------------- 1.3/1.3 MB 82.6 MB/s eta 0:00:00
Collecting flatbuffers<3.0,>=1.12
Downloading flatbuffers-2.0.7-py2.py3-none-any.whl (26 kB)
Requirement already satisfied: wheel<1.0,>=0.32.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow->-r requirements.txt (line 6)) (0.43.0)
Collecting tensorflow-estimator<2.8,~=2.7.0rc0
Downloading tensorflow_estimator-2.7.0-py2.py3-none-any.whl (463 kB)
------------------------------------- 463.1/463.1 kB 28.3 MB/s eta 0:00:00
Collecting protobuf<3.20,>=3.9.2
Downloading protobuf-3.19.6-cp39-cp39-win_amd64.whl (895 kB)
------------------------------------- 895.9/895.9 kB 59.0 MB/s eta 0:00:00
Requirement already satisfied: six>=1.12.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow->-r requirements.txt (line 6)) (1.16.0)
Requirement already satisfied: termcolor>=1.1.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorflow->-r requirements.txt (line 6)) (2.4.0)
Collecting keras-preprocessing>=1.1.1
Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
---------------------------------------- 42.6/42.6 kB ? eta 0:00:00
Requirement already satisfied: gast<0.5.0,>=0.2.1 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow->-r requirements.txt (line 6)) (0.4.0)
Requirement already satisfied: google-pasta>=0.1.1 in c:\users\cobus\anaconda3\lib\site-packages (from tensorflow->-r requirements.txt (line 6)) (0.2.0)
Requirement already satisfied: cloudpickle in c:\users\cobus\anaconda3\lib\site-packages (from shap->-r requirements.txt (line 7)) (2.0.0)
Requirement already satisfied: numba in c:\users\cobus\appdata\roaming\python\python39\site-packages (from shap->-r requirements.txt (line 7)) (0.59.1)
Requirement already satisfied: slicer==0.0.7 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from shap->-r requirements.txt (line 7)) (0.0.7)
Requirement already satisfied: scikit-image>=0.12 in c:\users\cobus\anaconda3\lib\site-packages (from lime->-r requirements.txt (line 8)) (0.19.2)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from pandas->-r requirements.txt (line 9)) (2.9.0.post0)
Collecting pandas
Downloading pandas-2.2.0-cp39-cp39-win_amd64.whl (11.6 MB)
-------------------------------------- 11.6/11.6 MB 131.1 MB/s eta 0:00:00
Downloading pandas-2.1.4-cp39-cp39-win_amd64.whl (10.8 MB)
--------------------------------------- 10.8/10.8 MB 59.5 MB/s eta 0:00:00
Downloading pandas-2.1.3-cp39-cp39-win_amd64.whl (10.8 MB)
-------------------------------------- 10.8/10.8 MB 108.8 MB/s eta 0:00:00
Downloading pandas-2.1.2-cp39-cp39-win_amd64.whl (10.8 MB)
-------------------------------------- 10.8/10.8 MB 131.0 MB/s eta 0:00:00
Downloading pandas-2.1.1-cp39-cp39-win_amd64.whl (10.8 MB)
-------------------------------------- 10.8/10.8 MB 131.1 MB/s eta 0:00:00
Downloading pandas-2.1.0-cp39-cp39-win_amd64.whl (11.2 MB)
-------------------------------------- 11.2/11.2 MB 108.7 MB/s eta 0:00:00
Downloading pandas-2.0.3-cp39-cp39-win_amd64.whl (10.8 MB)
-------------------------------------- 10.8/10.8 MB 131.1 MB/s eta 0:00:00
Downloading pandas-2.0.2-cp39-cp39-win_amd64.whl (10.7 MB)
-------------------------------------- 10.7/10.7 MB 131.1 MB/s eta 0:00:00
Downloading pandas-2.0.1-cp39-cp39-win_amd64.whl (10.7 MB)
-------------------------------------- 10.7/10.7 MB 131.1 MB/s eta 0:00:00
Downloading pandas-2.0.0-cp39-cp39-win_amd64.whl (11.3 MB)
-------------------------------------- 11.3/11.3 MB 131.0 MB/s eta 0:00:00
Downloading pandas-1.5.3-cp39-cp39-win_amd64.whl (10.9 MB)
-------------------------------------- 10.9/10.9 MB 131.1 MB/s eta 0:00:00
Downloading pandas-1.5.2-cp39-cp39-win_amd64.whl (10.9 MB)
-------------------------------------- 10.9/10.9 MB 131.1 MB/s eta 0:00:00
Downloading pandas-1.5.1-cp39-cp39-win_amd64.whl (10.9 MB)
-------------------------------------- 10.9/10.9 MB 131.1 MB/s eta 0:00:00
Downloading pandas-1.5.0-cp39-cp39-win_amd64.whl (10.9 MB)
-------------------------------------- 10.9/10.9 MB 131.1 MB/s eta 0:00:00
Downloading pandas-1.4.4-cp39-cp39-win_amd64.whl (10.6 MB)
--------------------------------------- 10.6/10.6 MB 13.6 MB/s eta 0:00:00
Requirement already satisfied: pytz>=2020.1 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from pandas->-r requirements.txt (line 9)) (2024.1)
Requirement already satisfied: pyparsing>=2.2.1 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from matplotlib->-r requirements.txt (line 10)) (3.1.2)
Requirement already satisfied: cycler>=0.10 in c:\users\cobus\anaconda3\lib\site-packages (from matplotlib->-r requirements.txt (line 10)) (0.11.0)
Requirement already satisfied: pillow>=6.2.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from matplotlib->-r requirements.txt (line 10)) (10.3.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\cobus\anaconda3\lib\site-packages (from matplotlib->-r requirements.txt (line 10)) (1.4.2)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\cobus\anaconda3\lib\site-packages (from matplotlib->-r requirements.txt (line 10)) (4.25.0)
Requirement already satisfied: click in c:\users\cobus\anaconda3\lib\site-packages (from nltk->-r requirements.txt (line 11)) (8.0.4)
Requirement already satisfied: regex>=2021.8.3 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from nltk->-r requirements.txt (line 11)) (2023.12.25)
Requirement already satisfied: attrs>=19.2.0 in c:\users\cobus\anaconda3\lib\site-packages (from jsonlines->-r requirements.txt (line 13)) (21.4.0)
Requirement already satisfied: blis<0.8.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from alibi[tensorflow]->-r requirements.txt (line 14)) (0.7.11)
Requirement already satisfied: pydantic<2.0.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from alibi[tensorflow]->-r requirements.txt (line 14)) (1.10.15)
Requirement already satisfied: dill<0.4.0,>=0.3.0 in c:\users\cobus\anaconda3\lib\site-packages (from alibi[tensorflow]->-r requirements.txt (line 14)) (0.3.4)
Requirement already satisfied: spacy[lookups]<4.0.0,>=2.0.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from alibi[tensorflow]->-r requirements.txt (line 14)) (3.7.4)
Requirement already satisfied: filelock in c:\users\cobus\anaconda3\lib\site-packages (from transformers->-r requirements.txt (line 15)) (3.6.0)
Requirement already satisfied: pyyaml>=5.1 in c:\users\cobus\anaconda3\lib\site-packages (from transformers->-r requirements.txt (line 15)) (6.0)
Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in c:\users\cobus\anaconda3\lib\site-packages (from transformers->-r requirements.txt (line 15)) (0.13.3)
Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in c:\users\cobus\anaconda3\lib\site-packages (from transformers->-r requirements.txt (line 15)) (0.14.1)
Requirement already satisfied: fsspec in c:\users\cobus\anaconda3\lib\site-packages (from huggingface-hub<1.0,>=0.11.0->transformers->-r requirements.txt (line 15)) (2022.7.1)
Requirement already satisfied: idna<4,>=2.5 in c:\users\cobus\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.16.1->tensorflow->-r requirements.txt (line 6)) (3.3)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\cobus\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.16.1->tensorflow->-r requirements.txt (line 6)) (2.0.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.16.1->tensorflow->-r requirements.txt (line 6)) (2.2.1)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\cobus\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow-intel==2.16.1->tensorflow->-r requirements.txt (line 6)) (2022.12.7)
Requirement already satisfied: PyWavelets>=1.1.1 in c:\users\cobus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime->-r requirements.txt (line 8)) (1.3.0)
Requirement already satisfied: tifffile>=2019.7.26 in c:\users\cobus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime->-r requirements.txt (line 8)) (2021.7.2)
Requirement already satisfied: networkx>=2.2 in c:\users\cobus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime->-r requirements.txt (line 8)) (2.8.4)
Requirement already satisfied: imageio>=2.4.1 in c:\users\cobus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime->-r requirements.txt (line 8)) (2.19.3)
Requirement already satisfied: jinja2 in c:\users\cobus\anaconda3\lib\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (2.11.3)
Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (1.1.2)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (1.0.10)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (2.0.8)
Requirement already satisfied: srsly<3.0.0,>=2.4.3 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (2.4.8)
Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (1.0.5)
Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (2.0.10)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (3.0.9)
Requirement already satisfied: weasel<0.4.0,>=0.1.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (0.3.4)
Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (3.0.12)
Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (3.3.0)
Requirement already satisfied: thinc<8.3.0,>=8.2.2 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (8.2.3)
Requirement already satisfied: typer<0.10.0,>=0.3.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (0.9.4)
Requirement already satisfied: spacy-lookups-data<1.1.0,>=1.0.3 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (1.0.5)
Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in c:\users\cobus\anaconda3\lib\site-packages (from tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (1.8.1)
Requirement already satisfied: werkzeug>=1.0.1 in c:\users\cobus\anaconda3\lib\site-packages (from tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (2.0.3)
Requirement already satisfied: google-auth<3,>=1.6.3 in c:\users\cobus\anaconda3\lib\site-packages (from tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (2.17.3)
Requirement already satisfied: markdown>=2.6.8 in c:\users\cobus\anaconda3\lib\site-packages (from tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (3.3.4)
Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (0.7.2)
Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in c:\users\cobus\anaconda3\lib\site-packages (from tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (1.0.0)
Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from numba->shap->-r requirements.txt (line 7)) (0.42.0)
Collecting numba
Downloading numba-0.59.0-cp39-cp39-win_amd64.whl (2.6 MB)
---------------------------------------- 2.6/2.6 MB 85.0 MB/s eta 0:00:00
Downloading numba-0.58.1-cp39-cp39-win_amd64.whl (2.6 MB)
---------------------------------------- 2.6/2.6 MB 84.4 MB/s eta 0:00:00
Collecting llvmlite<0.42,>=0.41.0dev0
Downloading llvmlite-0.41.1-cp39-cp39-win_amd64.whl (28.1 MB)
--------------------------------------- 28.1/28.1 MB 72.5 MB/s eta 0:00:00
Collecting numba
Downloading numba-0.58.0-cp39-cp39-win_amd64.whl (2.6 MB)
---------------------------------------- 2.6/2.6 MB 23.8 MB/s eta 0:00:00
Downloading numba-0.57.1-cp39-cp39-win_amd64.whl (2.5 MB)
---------------------------------------- 2.5/2.5 MB 14.8 MB/s eta 0:00:00
Downloading numba-0.57.0-cp39-cp39-win_amd64.whl (2.6 MB)
---------------------------------------- 2.6/2.6 MB 14.8 MB/s eta 0:00:00
Downloading numba-0.56.4-cp39-cp39-win_amd64.whl (2.5 MB)
---------------------------------------- 2.5/2.5 MB 78.9 MB/s eta 0:00:00
Collecting llvmlite<0.40,>=0.39.0dev0
Downloading llvmlite-0.39.1-cp39-cp39-win_amd64.whl (23.2 MB)
--------------------------------------- 23.2/23.2 MB 93.8 MB/s eta 0:00:00
Requirement already satisfied: rsa<5,>=3.1.4 in c:\users\cobus\anaconda3\lib\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (4.9)
Requirement already satisfied: cachetools<6.0,>=2.0.0 in c:\users\cobus\anaconda3\lib\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (5.3.0)
Requirement already satisfied: pyasn1-modules>=0.2.1 in c:\users\cobus\anaconda3\lib\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (0.2.8)
Requirement already satisfied: requests-oauthlib>=0.7.0 in c:\users\cobus\anaconda3\lib\site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (1.3.1)
Requirement already satisfied: confection<1.0.0,>=0.0.1 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from thinc<8.3.0,>=8.2.2->spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (0.1.4)
Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from weasel<0.4.0,>=0.1.0->spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (0.16.0)
Requirement already satisfied: MarkupSafe>=0.23 in c:\users\cobus\anaconda3\lib\site-packages (from jinja2->spacy[lookups]<4.0.0,>=2.0.0->alibi[tensorflow]->-r requirements.txt (line 14)) (2.0.1)
Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (0.4.8)
Requirement already satisfied: oauthlib>=3.0.0 in c:\users\cobus\appdata\roaming\python\python39\site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.13,>=2.12->tensorflow-intel==2.12.0->tensorflow->-r requirements.txt (line 6)) (3.2.2)
Installing collected packages: tensorflow-estimator, keras, flatbuffers, protobuf, numpy, llvmlite, pandas, numba, keras-preprocessing, tensorflow
Attempting uninstall: tensorflow-estimator
Found existing installation: tensorflow-estimator 2.12.0
Uninstalling tensorflow-estimator-2.12.0:
Successfully uninstalled tensorflow-estimator-2.12.0
Attempting uninstall: keras
Found existing installation: keras 2.12.0
Uninstalling keras-2.12.0:
Successfully uninstalled keras-2.12.0
Attempting uninstall: flatbuffers
Found existing installation: flatbuffers 24.3.25
Uninstalling flatbuffers-24.3.25:
Successfully uninstalled flatbuffers-24.3.25
Attempting uninstall: protobuf
Found existing installation: protobuf 4.25.3
Uninstalling protobuf-4.25.3:
Successfully uninstalled protobuf-4.25.3
WARNING: Ignoring invalid distribution -umpy (c:\users\cobus\anaconda3\lib\site-packages)
WARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ProtocolError('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))': /simple/flatbuffers/
WARNING: Ignoring invalid distribution -umpy (c:\users\cobus\anaconda3\lib\site-packages)
ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\cobus\\AppData\\Roaming\\Python\\Python39\\site-packages\\google\\~upb\\_message.cp39-win_amd64.pyd'
Check the permissions.
WARNING: Ignoring invalid distribution -umpy (c:\users\cobus\anaconda3\lib\site-packages)
WARNING: Ignoring invalid distribution -umpy (c:\users\cobus\anaconda3\lib\site-packages)
# Import necessary modules
import tensorflow as tf
import numpy as np
import pandas as pd
import re
from alibi.explainers import IntegratedGradients
import matplotlib as mpl
model.load_weights(r'C:\Users\cobus\Desktop\Thesis\Best Models\Best Models\doc_lstm')
<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x16d87041e80>
# Extracting the first transformer block
bl = model.layers[0]
print(bl)
<keras.layers.core.embedding.Embedding object at 0x0000016DF5E6CB50>
# Define the integrated gradient method
n_steps = 20
method = "gausslegendre"
internal_batch_size = 25
ig = IntegratedGradients(model,
layer=bl,
n_steps=n_steps,
method=method,
internal_batch_size=internal_batch_size)
Xwords_test
array([[ 0, 0, 0, ..., 390, 62, 1499],
[ 0, 0, 0, ..., 27, 1, 490],
[ 58, 72, 39, ..., 6, 84, 27],
...,
[ 0, 0, 0, ..., 7, 10, 15],
[ 0, 0, 0, ..., 2, 82, 100],
[ 0, 0, 0, ..., 84, 126, 10]])
predictions = np.around(model.predict(Xwords_test)).reshape(-1).astype(int)
explanaition = ig.explain(Xwords_test,
# forward_kwargs=kwargs,
baselines=None,
target=predictions)